def extract_rand(input_path, suffix, alignment_file): path_argv = [input_path._version, input_path._dataset + suffix] output_path = common.Paths(path_argv, 0) data_versioning.setup_new_dataset(output_path) shutil.copy(alignment_file, output_path.alignment) shutil.copy(input_path.outgroups_file, output_path.outgroups_file) shutil.copy(input_path.duplicates_json, output_path.duplicates_json)
def extract_me(input_path, suffix, alignment_file): print( "Extracting alignment generated with the maximum entropy tree thinning technique..." ) path_argv = [input_path._version, input_path._dataset + suffix] output_path = common.Paths(path_argv, 0) data_versioning.setup_new_dataset(output_path) shutil.copy(alignment_file, output_path.alignment) shutil.copy(input_path.outgroups_file, output_path.outgroups_file) shutil.copy(input_path.duplicates_json, output_path.duplicates_json) print("New version of the snapshot: " + output_path.path)
def extract_ss(input_path, suffix, tree_file): tree = Tree(tree_file, format=1) leaves_set = set(tree.get_leaf_names()) msa = SeqGroup(input_path.alignment, "fasta") path_argv = [input_path._version, input_path._dataset + suffix] output_path = common.Paths(path_argv, 0) data_versioning.setup_new_dataset(output_path) new_msa = SeqGroup() for entry in msa.iter_entries(): label = entry[0] sequence = entry[1] if (label in leaves_set): new_msa.set_seq(label, sequence) open(output_path.alignment, "w").write(new_msa.write(format="fasta")) shutil.copy(input_path.duplicates_json, output_path.duplicates_json) shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
def setup_new_version( date=datetime.datetime.now().strftime("%Y-%m-%d"), datasets = ["fmsao", "fmsan", "smsao", "smsan"]): version_id = get_current_version_id( date ) version = "{}_{}".format( date, version_id ) # make the base path for the version util.make_path( util.versioned_path(version, "") ) # generate the appropriate paths paths = [] for ds in datasets: p = common.Paths([version, ds], 0) setup_new_dataset(p) paths.append(p) print(version) return paths
def extract_ss(input_path, suffix, tree_file): print( "Extracting alignment generated with the support selection tree thinning technique..." ) tree = Tree(tree_file, format=1) leaves_set = set(tree.get_leaf_names()) msa = SeqGroup(input_path.alignment, "fasta") path_argv = [input_path._version, input_path._dataset + suffix] output_path = common.Paths(path_argv, 0) data_versioning.setup_new_dataset(output_path) new_msa = SeqGroup() for entry in msa.iter_entries(): label = entry[0] sequence = entry[1] if (label in leaves_set): new_msa.set_seq(label, sequence) open(output_path.alignment, "w").write(new_msa.write(format="fasta")) shutil.copy(input_path.duplicates_json, output_path.duplicates_json) shutil.copy(input_path.outgroups_file, output_path.outgroups_file) print("New version of the snapshot: " + output_path.path)
#!/usr/bin/env python3 import os import sys sys.path.insert(0, 'scripts') import common import thinned_dataset_extraction paths = common.Paths(sys.argv) # Support Selection thinning thinned_dataset_extraction.extract_ss(paths, "-ss_thinned", paths.ss_mre_thinned_tree) # Clade compression thinning #thinned_dataset_extraction.extract_cc(paths, "-cc_thinned", paths.cc_thinned_alignment) # Max entropy thinning #thinned_dataset_extraction.extract_cc(paths, "-me_thinned", paths.me_thinned_alignment) # Random thinning thinned_dataset_extraction.extract_rand(paths, "-rand_thinned", paths.rand_thinned_alignment)