Python TreeConstruction.ParsimonyScorer Examples

Programming Language: Python

Namespace/Package Name: Bio.Phylo

Class/Type: TreeConstruction

Method/Function: ParsimonyScorer

Examples at hotexamples.com: 2

Python TreeConstruction.ParsimonyScorer - 2 examples found. These are the top rated real world Python examples of Bio.Phylo.TreeConstruction.ParsimonyScorer extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

DistanceTreeConstructor(8)

_DistanceMatrix(6)

DistanceCalculator(3)

ParsimonyScorer(2)

DistanceMatrix(1)

_Matrix(1)

Example #1

Show file

def evaluate_directory(data_dir,
                       eval_limit=50000,
                       lim=5,
                       limited_expand=False):
    '''
    Processes a directory of FASTA files, generating and evaluating trees
    using branch and bound with early stopping and limited expansion (optional)
    Args:
        data_dir: str, the path to the data directory which stores fasta files
        eval_limit: int, max number of trees to evaluate for each file. Good
                    setting depends on seq length and time you're willing to wait.
        lim: int, number of files in the data directory to process
        limited_expand: bool, whether or not to use limited expansion
    Returns:
        List[Tree], a list of BioPython Phylo trees with tied best scores
    '''
    scorer = TreeConstruction.ParsimonyScorer()
    all_best = []

    files = os.listdir(data_dir)

    for i, file in enumerate(files[:lim]):
        # Load and sort file
        print(f"Processing {file} ({i+1}/{len(files[:lim])})")
        aln = AlignIO.read(open(data_dir + os.path.sep + file), 'fasta')
        aln.sort(key=lambda a: a.id)

        result_trees = get_best_trees(aln, scorer, eval_limit, limited_expand)
        print(f"Found {len(result_trees)} trees.")
        all_best.extend(result_trees)

    return [tr[0] for tr in all_best]

Example #2

Show file

def find_good_tree(trees, data_dir, lim=25):
    '''
    Evaluates a list of trees against multiple alignments, returning the best
    scoring tree across all alignments. Requires tree terminal names and
    alignment names in data_dir are the same.
    Args:
        trees: List[Tree], a list of BioPython Phylo trees to evaluate
        data_dir: str, the path to the data directory
        lim: int, number of alignments to evaluate against (more is slower)
    '''
    best_trees = []
    scorer = TreeConstruction.ParsimonyScorer()

    files = os.listdir(data_dir)
    alns = []
    for i, file in enumerate(files[:lim]):
        aln = AlignIO.read(open(data_dir + os.path.sep + file), 'fasta')
        for rec in aln:
            rec.name = rec.id = rec.name[:5]
        alns.append(aln)
    scores = {}
    max_aln = {}

    # Score trees and track highest score for each alignment to normalize later
    print(f"Processing {len(trees)} trees...")
    for i, tree in enumerate(trees):
        print(f"\t{i+1}/{len(trees)}")
        for j, aln in enumerate(alns[:lim]):
            try:
                sco = scorer.get_score(copy.deepcopy(tree), aln)
            except Exception as e:
                print(e)
                print(
                    "Scoring failed. Did you ensure that terminal names and alignment names match?"
                )
                return None

            scores[(i, j)] = sco
            if sco > max_aln.get(j, 0): max_aln[j] = sco

    # Computes normalized scores for each tree
    fin_scores = {}
    for i in range(len(trees)):
        for j in range(len(alns[:lim])):
            m_aln = max_aln.get(j, 0)
            if m_aln <= 0: continue
            normalized_score = scores.get((i, j), 0) / m_aln
            if not normalized_score:
                print(f"Error for tree {i} and alignment {j}.")
            fin_scores[i] = fin_scores.get(i, 0) + normalized_score

    # Finds final best tree
    best_tree = -1
    for key in fin_scores.keys():
        if best_tree < 0 or fin_scores[key] < fin_scores[best_tree]:
            best_tree = key
    if best_tree < 0:
        print("No best tree found.")
        return None

    return trees[best_tree]