Exemple #1
0
def extract_rand(input_path, suffix, alignment_file):
    path_argv = [input_path._version, input_path._dataset + suffix]
    output_path = common.Paths(path_argv, 0)
    data_versioning.setup_new_dataset(output_path)
    shutil.copy(alignment_file, output_path.alignment)
    shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
    shutil.copy(input_path.duplicates_json, output_path.duplicates_json)
Exemple #2
0
def extract_me(input_path, suffix, alignment_file):
    print(
        "Extracting alignment generated with the maximum entropy tree thinning technique..."
    )
    path_argv = [input_path._version, input_path._dataset + suffix]
    output_path = common.Paths(path_argv, 0)
    data_versioning.setup_new_dataset(output_path)
    shutil.copy(alignment_file, output_path.alignment)
    shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
    shutil.copy(input_path.duplicates_json, output_path.duplicates_json)
    print("New version of the snapshot: " + output_path.path)
Exemple #3
0
def extract_ss(input_path, suffix, tree_file):
    tree = Tree(tree_file, format=1)
    leaves_set = set(tree.get_leaf_names())
    msa = SeqGroup(input_path.alignment, "fasta")
    path_argv = [input_path._version, input_path._dataset + suffix]
    output_path = common.Paths(path_argv, 0)
    data_versioning.setup_new_dataset(output_path)
    new_msa = SeqGroup()
    for entry in msa.iter_entries():
        label = entry[0]
        sequence = entry[1]
        if (label in leaves_set):
            new_msa.set_seq(label, sequence)
    open(output_path.alignment, "w").write(new_msa.write(format="fasta"))
    shutil.copy(input_path.duplicates_json, output_path.duplicates_json)
    shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
Exemple #4
0
def setup_new_version( date=datetime.datetime.now().strftime("%Y-%m-%d"),
    datasets = ["fmsao", "fmsan", "smsao", "smsan"]):

  version_id = get_current_version_id( date )

  version = "{}_{}".format( date, version_id )

  # make the base path for the version
  util.make_path( util.versioned_path(version, "") )

  # generate the appropriate paths
  paths = []
  for ds in datasets:
    p = common.Paths([version, ds], 0)
    setup_new_dataset(p)
    paths.append(p)

  print(version)

  return paths
Exemple #5
0
def extract_ss(input_path, suffix, tree_file):
    print(
        "Extracting alignment generated with the support selection tree thinning technique..."
    )
    tree = Tree(tree_file, format=1)
    leaves_set = set(tree.get_leaf_names())
    msa = SeqGroup(input_path.alignment, "fasta")
    path_argv = [input_path._version, input_path._dataset + suffix]
    output_path = common.Paths(path_argv, 0)
    data_versioning.setup_new_dataset(output_path)
    new_msa = SeqGroup()
    for entry in msa.iter_entries():
        label = entry[0]
        sequence = entry[1]
        if (label in leaves_set):
            new_msa.set_seq(label, sequence)
    open(output_path.alignment, "w").write(new_msa.write(format="fasta"))
    shutil.copy(input_path.duplicates_json, output_path.duplicates_json)
    shutil.copy(input_path.outgroups_file, output_path.outgroups_file)
    print("New version of the snapshot: " + output_path.path)
#!/usr/bin/env python3
import os
import sys
sys.path.insert(0, 'scripts')
import common
import thinned_dataset_extraction

paths = common.Paths(sys.argv)

# Support Selection thinning
thinned_dataset_extraction.extract_ss(paths, "-ss_thinned",
                                      paths.ss_mre_thinned_tree)

# Clade compression thinning
#thinned_dataset_extraction.extract_cc(paths, "-cc_thinned", paths.cc_thinned_alignment)

# Max entropy thinning
#thinned_dataset_extraction.extract_cc(paths, "-me_thinned", paths.me_thinned_alignment)

# Random thinning
thinned_dataset_extraction.extract_rand(paths, "-rand_thinned",
                                        paths.rand_thinned_alignment)