Ejemplo n.º 1
0
    def test_unzip(self):
        """unzip(items) should be the inverse of zip(*items)"""
        chars = [list('abcde'), list('ghijk')]
        numbers = [[1, 2, 3, 4, 5], [0, 0, 0, 0, 0]]
        strings = [["abcde", "fghij", "klmno"], ['xxxxx'] * 3]

        lists = [chars, numbers, strings]
        zipped = [zip(*i) for i in lists]
        unzipped = [unzip(i) for i in zipped]

        for u, l in zip(unzipped, lists):
            self.assertEqual(u, l)
Ejemplo n.º 2
0
    def test_unzip(self):
        """unzip(items) should be the inverse of zip(*items)"""
        chars = [list('abcde'), list('ghijk')]
        numbers = [[1, 2, 3, 4, 5], [0, 0, 0, 0, 0]]
        strings = [["abcde", "fghij", "klmno"], ['xxxxx'] * 3]

        lists = [chars, numbers, strings]
        zipped = [zip(*i) for i in lists]
        unzipped = [unzip(i) for i in zipped]

        for u, l in zip(unzipped, lists):
            self.assertEqual(u, l)
Ejemplo n.º 3
0
def get_polyphyletic(cons):
    """get polyphyletic groups and a representative tip"""
    tips, taxonstrings = unzip(cons.items())
    tree, lookup = make_consensus_tree(taxonstrings, False, tips=tips)
    cache_tipnames(tree)

    names = {}
    for n in tree.non_tips():
        if n.name is None:
            continue
        if (n.name, n.Rank) not in names:
            names[(n.name, n.Rank)] = {}
        if n.parent is not None:
            names[(n.name, n.Rank)][n.parent.name] = n.tip_names[0]

    return names
Ejemplo n.º 4
0
def get_polyphyletic(cons):
    """get polyphyletic groups and a representative tip"""
    tips, taxonstrings = unzip(cons.items())
    tree, lookup = make_consensus_tree(taxonstrings, False, tips=tips)
    cache_tipnames(tree)

    names = {}
    for n in tree.non_tips():
        if n.name is None:
            continue
        if (n.name, n.Rank) not in names:
            names[(n.name, n.Rank)] = {}
        if n.parent is not None:
            names[(n.name, n.Rank)][n.parent.name] = n.tip_names[0]

    return names
Ejemplo n.º 5
0
def name_node_score_fold(tree, score_f=fmeasure, tiebreak_f=min_tips,
                         verbose=False):
    """Compute name scores for internal nodes, pick the 'best'

    For this method, we traverse the tree once building up a dict of scores
    for names and nodes, we can then pick the 'best' node out of the dict
    to avoid horrible lookups in the tree
    """

    if verbose:
        print("Starting name_node_score_fold...")

    name_node_score = {i: {} for i in range(len(RANK_ORDER))}
    n_ranks = len(RANK_ORDER)

    for node in tree.non_tips(include_self=True):
        node.RankNameScores = [None] * n_ranks

        for rank, name in enumerate(node.RankNames):
            if name is None:
                continue

            # precision in this case is the percent of informative tips that
            # descend that are of the name relative to the number of
            # informative tips that descend
            precision = node.ValidRelFreq[rank][name]

            # recall in this case is the percent of informative tips that
            # descent that are of the name relative to the total number of
            # tips in the tree with name
            recall = node.ConsensusRelFreq[rank][name]

            # calculate score and save it for the corrisponding rank position
            # so that these values can be examined later in other contexts
            score = score_f(precision, recall)
            node.RankNameScores[rank] = score

            if name not in name_node_score[rank]:
                name_node_score[rank][name] = []
            name_node_score[rank][name].append((node, score))

    # run through the built up dict and pick the best node for a name
    used_scores = {}
    for rank, names in name_node_score.items():
        used_scores[rank] = []

        for name, node_scores in names.items():
            node_scores_sorted = sorted(node_scores, key=itemgetter(1))[::-1]
            nodes, scores = unzip(node_scores_sorted)
            scores = array(scores)

            used_scores[rank].append((name, scores[0]))

            # if there is a tie in scores...
            if sum(scores == scores[0]) > 1:
                # ugly hack to get around weird shape mismatch
                indices = where(scores == scores[0], range(len(nodes)), None)
                tie_nodes = []
                for i in indices:
                    if i is not None:
                        tie_nodes.append(nodes[i])
                    else:
                        tie_nodes.append(None)
                node_to_keep = tiebreak_f(tie_nodes)
                for node, score in node_scores_sorted:
                    if node == node_to_keep:
                        continue
                    else:
                        node.RankNames[rank] = None
            else:
                for node, score in node_scores_sorted[1:]:
                    node.RankNames[rank] = None

    return used_scores
Ejemplo n.º 6
0
def name_node_score_fold(tree,
                         score_f=fmeasure,
                         tiebreak_f=min_tips,
                         verbose=False):
    """Compute name scores for internal nodes, pick the 'best'

    For this method, we traverse the tree once building up a dict of scores
    for names and nodes, we can then pick the 'best' node out of the dict
    to avoid horrible lookups in the tree
    """

    if verbose:
        print("Starting name_node_score_fold...")

    name_node_score = {i: {} for i in range(len(RANK_ORDER))}
    n_ranks = len(RANK_ORDER)

    for node in tree.non_tips(include_self=True):
        node.RankNameScores = [None] * n_ranks

        for rank, name in enumerate(node.RankNames):
            if name is None:
                continue

            # precision in this case is the percent of informative tips that
            # descend that are of the name relative to the number of
            # informative tips that descend
            precision = node.ValidRelFreq[rank][name]

            # recall in this case is the percent of informative tips that
            # descent that are of the name relative to the total number of
            # tips in the tree with name
            recall = node.ConsensusRelFreq[rank][name]

            # calculate score and save it for the corrisponding rank position
            # so that these values can be examined later in other contexts
            score = score_f(precision, recall)
            node.RankNameScores[rank] = score

            if name not in name_node_score[rank]:
                name_node_score[rank][name] = []
            name_node_score[rank][name].append((node, score))

    # run through the built up dict and pick the best node for a name
    used_scores = {}
    for rank, names in name_node_score.items():
        used_scores[rank] = []

        for name, node_scores in names.items():
            node_scores_sorted = sorted(node_scores, key=itemgetter(1))[::-1]
            nodes, scores = unzip(node_scores_sorted)
            scores = array(scores)

            used_scores[rank].append((name, scores[0]))

            # if there is a tie in scores...
            if sum(scores == scores[0]) > 1:
                # ugly hack to get around weird shape mismatch
                indices = where(scores == scores[0], range(len(nodes)), None)
                tie_nodes = []
                for i in indices:
                    if i is not None:
                        tie_nodes.append(nodes[i])
                    else:
                        tie_nodes.append(None)
                node_to_keep = tiebreak_f(tie_nodes)
                for node, score in node_scores_sorted:
                    if node == node_to_keep:
                        continue
                    else:
                        node.RankNames[rank] = None
            else:
                for node, score in node_scores_sorted[1:]:
                    node.RankNames[rank] = None

    return used_scores