Exemple #1
0
    def test_matrix2tree(self):
        from lingpy.algorithm.clustering import matrix2tree

        newick = text_type(self.tmp_path('t'))
        matrix2tree([[1, 0], [0, 1]], ['a', 'b'], filename=newick)
        assert os.path.exists(newick + '.nwk')
        matrix2tree([[1, 0], [0, 1]], ['a', 'b'], tree_calc='upgma')
    def test_matrix2tree(self):
        from lingpy.algorithm.clustering import matrix2tree

        newick = text_type(self.tmp_path('t'))
        matrix2tree([[1, 0], [0, 1]], ['a', 'b'], filename=newick)
        assert os.path.exists(newick + '.nwk')
        matrix2tree([[1, 0], [0, 1]], ['a', 'b'], tree_calc='upgma')
Exemple #3
0
def test_matrix2tree(tmppath, matrix, taxa):
    newick = tmppath / 't'
    matrix2tree(matrix, taxa, filename=str(newick))
    assert newick.parent.joinpath(newick.name + '.nwk').exists()
    matrix2tree(matrix, taxa, tree_calc='upgma')
    matrix2tree(matrix, taxa, tree_calc='neighbor')

    with pytest.raises(ValueError):
        matrix2tree(*[matrix, taxa], **{"tree_calc": "dummy"})
Exemple #4
0
    def test_matrix2tree(self):
        newick = text_type(self.tmp_path('t'))
        matrix2tree(self.matrix, self.taxa, filename=newick)
        assert os.path.exists(newick + '.nwk')
        matrix2tree(self.matrix, self.taxa, tree_calc='upgma')
        matrix2tree(self.matrix, self.taxa, tree_calc='neighbor')

        assert_raises(ValueError, matrix2tree, *[self.matrix, self.taxa],
                      **{"tree_calc": "dummy"})
Exemple #5
0
    def test_matrix2tree(self):
        from lingpy.algorithm.clustering import matrix2tree

        newick = text_type(self.tmp_path('t'))
        matrix2tree(self.matrix, self.taxa, filename=newick)
        assert os.path.exists(newick + '.nwk')
        matrix2tree(self.matrix, self.taxa, tree_calc='upgma')
        matrix2tree(self.matrix, self.taxa, tree_calc='neighbor')

        assert_raises(ValueError, matrix2tree, *[self.matrix, self.taxa],
                **{"tree_calc": "dummy"})
Exemple #6
0
def calculate_data(wordlist,
                   data,
                   taxa='taxa',
                   concepts='concepts',
                   ref='cogid',
                   **keywords):
    """
    Manipulate a wordlist object by adding different kinds of data.

    Parameters
    ----------
    data : str
        The type of data that shall be calculated. Currently supports

        * "tree": calculate a reference tree based on shared cognates
        * "dst": get distances between taxa based on shared cognates
        * "cluster": cluster the taxa into groups using different methods


    """
    logger = log.get_logger()
    util.setdefaults(keywords,
                     distances=False,
                     tree_calc="upgma",
                     cluster="upgma",
                     force=False,
                     threshold=0.5,
                     cluster_method='upgma')

    # get taxa for current calculation
    these_taxa = eval('wordlist.' + taxa)

    # calculate distances
    if data in ['distances', 'dst']:
        wordlist._meta['distances'] = wl2dst(wordlist, taxa, concepts, ref,
                                             **keywords)
    elif data in ['diversity', 'div']:
        etd = wordlist.get_etymdict(ref=ref)
        wordlist._meta['diversity'] = \
            (len(etd) - wordlist.height) / (len(wordlist) - wordlist.height)
    elif data in ['tre', 'tree', 'nwk']:
        if 'distances' not in wordlist._meta:
            wordlist._meta['distances'] = \
                wl2dst(wordlist, taxa, concepts, ref, **keywords)
        distances = wordlist._meta['distances']
        if 'tree' in wordlist._meta and not keywords['force']:
            logger.warning("Reference tree has already been calculated, "
                           "force overwrite by "
                           "setting 'force' to 'True'.")
            return
        wordlist._meta['tree'] = clustering.matrix2tree(
            distances, these_taxa, keywords['tree_calc'],
            keywords['distances'])

    elif data in ['groups', 'cluster']:
        if 'distances' not in wordlist._meta:
            distances = wl2dst(wordlist, taxa, concepts, ref, **keywords)
        else:
            distances = wordlist._meta['distances']
        if 'groups' in wordlist._meta and not keywords['force']:
            logger.warning("Distance matrix has already been calculated, "
                           "force overwrite by "
                           "setting 'force' to 'True'.")
            return
        wordlist._meta['groups'] = clustering.matrix2groups(
            keywords['threshold'], distances, these_taxa,
            keywords['cluster_method'])
    log.info("Successfully calculated {0}.".format(data))
Exemple #7
0
def calculate_data(
        wordlist,
        data,
        taxa='taxa',
        concepts='concepts',
        ref='cogid',
        **keywords):
    """
    Manipulate a wordlist object by adding different kinds of data.

    Parameters
    ----------
    data : str
        The type of data that shall be calculated. Currently supports

        * "tree": calculate a reference tree based on shared cognates
        * "dst": get distances between taxa based on shared cognates
        * "cluster": cluster the taxa into groups using different methods


    """
    logger = log.get_logger()
    util.setdefaults(
        keywords,
        distances=False,
        tree_calc="upgma",
        cluster="upgma",
        force=False,
        threshold=0.5,
        cluster_method='upgma')

    # get taxa for current calculation
    these_taxa = eval('wordlist.' + taxa)

    # calculate distances
    if data in ['distances', 'dst']:
        wordlist._meta['distances'] = wl2dst(
                wordlist, taxa, concepts, ref, **keywords)
    elif data in ['diversity', 'div']:
        etd = wordlist.get_etymdict(ref=ref)
        wordlist._meta['diversity'] = \
            (len(etd) - wordlist.height) / (len(wordlist) - wordlist.height)
    elif data in ['tre', 'tree', 'nwk']:
        if 'distances' not in wordlist._meta:
            wordlist._meta['distances'] = \
                wl2dst(wordlist, taxa, concepts, ref, **keywords)
        distances = wordlist._meta['distances']
        if 'tree' in wordlist._meta and not keywords['force']:
            logger.warn(
                    "Reference tree has already been calculated, "
                    "force overwrite by "
                    "setting 'force' to 'True'.")
            return
        wordlist._meta['tree'] = clustering.matrix2tree(
            distances, these_taxa, keywords['tree_calc'],
            keywords['distances'])

    elif data in ['groups', 'cluster']:
        if 'distances' not in wordlist._meta:
            distances = wl2dst(wordlist, taxa, concepts, ref, **keywords)
        else:
            distances = wordlist._meta['distances']
        if 'groups' in wordlist._meta and not keywords['force']:
            logger.warn(
                    "Distance matrix has already been calculated, "
                    "force overwrite by "
                    "setting 'force' to 'True'.")
            return
        wordlist._meta['groups'] = clustering.matrix2groups(
            keywords['threshold'], distances, these_taxa,
            keywords['cluster_method'])
    log.info("Successfully calculated {0}.".format(data))