def check_tree_subset(fasta_labels, tree_fp): """ Returns a list of all fasta labels that are not a subset of the tree fasta_labels: list of fasta labels tree_fp: tree filepath """ # Need to get modified fasta labels with underscore stripped raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels]) tree_f = open(tree_fp, "U") tree = DndParser(tree_f) # Get a set of tree tip names tree_tips = set(tree.getTipNames()) labels_not_in_tips = [] for curr_label in raw_fasta_labels: if curr_label not in tree_tips: labels_not_in_tips.append(curr_label) # Return True if all found in tree tips if len(labels_not_in_tips) == 0: labels_not_in_tips = True return labels_not_in_tips
def check_tree_exact_match(fasta_labels, tree_fp): """Checks fasta labels to exact match to tree tips Returns a list of two lists, the fasta labels not in tips, and tips not in fasta labels. fasta_labels: list of fasta labels tree_fp: tree filepath """ # Need to get modified fasta labels with underscore stripped raw_fasta_labels = set([label.split('_')[0] for label in fasta_labels]) tree_f = open(tree_fp, "U") tree = DndParser(tree_f) # Get a set of tree tip names tree_tips = set(tree.getTipNames()) labels_not_in_tips = [] for curr_label in raw_fasta_labels: if curr_label not in tree_tips: labels_not_in_tips.append(curr_label) # Return True if all found in tree tips if len(labels_not_in_tips) == 0: labels_not_in_tips = True tips_not_in_labels = [] for curr_tip in tree_tips: if curr_tip not in raw_fasta_labels: tips_not_in_labels.append(curr_tip) if len(tips_not_in_labels) == 0: tips_not_in_labels = True return [labels_not_in_tips, tips_not_in_labels]