def check_tree_subset(fasta_labels, tree_fp):
    """ Returns a list of all fasta labels that are not a subset of the tree

    fasta_labels:  list of fasta labels
    tree_fp: tree filepath
    """

    # Need to get modified fasta labels with underscore stripped

    raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels])

    tree_f = open(tree_fp, "U")

    tree = DndParser(tree_f)

    # Get a set of tree tip names
    tree_tips = set(tree.getTipNames())

    labels_not_in_tips = []

    for curr_label in raw_fasta_labels:
        if curr_label not in tree_tips:
            labels_not_in_tips.append(curr_label)

    # Return True if all found in tree tips
    if len(labels_not_in_tips) == 0:
        labels_not_in_tips = True

    return labels_not_in_tips
def check_tree_subset(fasta_labels,
                      tree_fp):
    """ Returns a list of all fasta labels that are not a subset of the tree
    
    fasta_labels:  list of fasta labels
    tree_fp: tree filepath
    """
    
    # Need to get modified fasta labels with underscore stripped
    
    raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels])
    
    tree_f = open(tree_fp, "U")
    
    tree = DndParser(tree_f)
    
    # Get a set of tree tip names
    tree_tips = set(tree.getTipNames())
    
    labels_not_in_tips = []
    
    for curr_label in raw_fasta_labels:
        if curr_label not in tree_tips:
            labels_not_in_tips.append(curr_label)
    
    # Return True if all found in tree tips
    if len(labels_not_in_tips) == 0:
        labels_not_in_tips = True
    
    return labels_not_in_tips
def check_tree_exact_match(fasta_labels,
                           tree_fp):
    """Checks fasta labels to exact match to tree tips
    
    Returns a list of two lists, the fasta labels not in tips, and tips not
     in fasta labels.
    fasta_labels: list of fasta labels
    tree_fp: tree filepath
    """
    
    # Need to get modified fasta labels with underscore stripped
    
    raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels])
    
    tree_f = open(tree_fp, "U")
    
    tree = DndParser(tree_f)
    
    # Get a set of tree tip names
    tree_tips = set(tree.getTipNames())
    
    labels_not_in_tips = []
    
    for curr_label in raw_fasta_labels:
        if curr_label not in tree_tips:
            labels_not_in_tips.append(curr_label)
            
    
    # Return True if all found in tree tips
    if len(labels_not_in_tips) == 0:
        labels_not_in_tips = True
        
    tips_not_in_labels = []
    
    for curr_tip in tree_tips:
        if curr_tip not in raw_fasta_labels:
            tips_not_in_labels.append(curr_tip)
            
    if len(tips_not_in_labels) == 0:
        tips_not_in_labels = True
    
    return [labels_not_in_tips, tips_not_in_labels]
Beispiel #4
0
def check_tree_exact_match(fasta_labels,
                           tree_fp):
    """Checks fasta labels to exact match to tree tips

    Returns a list of two lists, the fasta labels not in tips, and tips not
     in fasta labels.
    fasta_labels: list of fasta labels
    tree_fp: tree filepath
    """

    # Need to get modified fasta labels with underscore stripped

    raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels])

    tree_f = open(tree_fp, "U")

    tree = DndParser(tree_f)

    # Get a set of tree tip names
    tree_tips = set(tree.getTipNames())

    labels_not_in_tips = []

    for curr_label in raw_fasta_labels:
        if curr_label not in tree_tips:
            labels_not_in_tips.append(curr_label)

    # Return True if all found in tree tips
    if len(labels_not_in_tips) == 0:
        labels_not_in_tips = True

    tips_not_in_labels = []

    for curr_tip in tree_tips:
        if curr_tip not in raw_fasta_labels:
            tips_not_in_labels.append(curr_tip)

    if len(tips_not_in_labels) == 0:
        tips_not_in_labels = True

    return [labels_not_in_tips, tips_not_in_labels]