Esempio n. 1
0
def create_tree_parsimony_impl(msa):
    calculator = DistanceCalculator('identity')
    constructor = DistanceTreeConstructor(distance_calculator=calculator, method='nj')
    starting_tree = constructor.build_tree(msa)

    scorer = ParsimonyScorer()
    searcher = NNITreeSearcher(scorer)
    constructor = ParsimonyTreeConstructor(searcher=searcher,starting_tree=starting_tree)
    tree = constructor.build_tree(msa)
    Phylo.write(tree, "../../data/created/tree" + str(random.randint(0,10000000)) + ".nex", "nexus")
    Phylo.draw(tree,do_show=False)
    plt.savefig("../../data/created/createdTree_parsimony.png")
    return "../../data/created/createdTree_parsimony.png"
Esempio n. 2
0
def consensus(msa):
    alignment = MultipleSeqAlignment(msa)
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(alignment)
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(alignment)
    print tree
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor"""
    def setUp(self):
        self.aln = AlignIO.read(open('TreeConstruction/msa.phy'), 'phylip')
        calculator = DistanceCalculator('blosum62')
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        tree_file = StringIO.StringIO()
        Phylo.write(tree, tree_file, 'newick')
        ref_tree = open('./TreeConstruction/upgma.tre')
        self.assertEqual(tree_file.getvalue(), ref_tree.readline())
        ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        tree_file = StringIO.StringIO()
        Phylo.write(tree, tree_file, 'newick')
        ref_tree = open('./TreeConstruction/nj.tre')
        self.assertEqual(tree_file.getvalue(), ref_tree.readline())
        ref_tree.close()

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        tree_file = StringIO.StringIO()
        Phylo.write(tree, tree_file, 'newick')
        ref_tree = open('./TreeConstruction/nj.tre')
        self.assertEqual(tree_file.getvalue(), ref_tree.readline())
        ref_tree.close()
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor"""
    def setUp(self):
        self.aln = AlignIO.read('TreeConstruction/msa.phy', 'phylip')
        calculator = DistanceCalculator('blosum62')
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/upgma.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
Esempio n. 5
0
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor"""

    def setUp(self):
        self.aln = AlignIO.read('TreeConstruction/msa.phy', 'phylip')
        calculator = DistanceCalculator('blosum62')
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/upgma.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertTrue(isinstance(tree, BaseTree.Tree))
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read('./TreeConstruction/nj.tre', 'newick')
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
Esempio n. 6
0
def consensus(msa):
    alignment = MultipleSeqAlignment(msa)
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(alignment)
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(alignment)
    print tree
Esempio n. 7
0
def fastaToNJTree(fastaFile, outputFile):
    aln = AlignIO.read(fastaFile, 'fasta')
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(aln)
    Phylo.write(tree, outputFile, 'newick')
Esempio n. 8
0
def main():
    alignment = AlignIO.read(open("protein.fasta"), "fasta")
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(alignment)
    constructor = DistanceTreeConstructor(calculator, 'upgma')
    tree = constructor.build_tree(alignment)
    tree.ladderize()
    Phylo.draw(tree)
Esempio n. 9
0
def create_tree_distance_impl(msa, algorithm):
    calculator = DistanceCalculator('identity')
    constructor = DistanceTreeConstructor(distance_calculator=calculator,method=algorithm)
    tree = constructor.build_tree(msa)
    Phylo.write(tree, "../../data/created/tree" + str(random.randint(0,10000000)) + ".nex", "nexus")
    Phylo.draw(tree,do_show=False)
    plt.savefig("../../data/created/createdTree"+algorithm+".png")
    return "../../data/created/createdTree"+algorithm+".png"
Esempio n. 10
0
 def tree(self):
     """Returns a phylogenetic tree constructed from the given alignment."""
     calculator = DistanceCalculator(self._distance_model)
     constructor = DistanceTreeConstructor(calculator, self._tree_algorithm)
     tree = constructor.build_tree(self.alignment)
     # Make the tree rooted.
     tree.root_at_midpoint()
     tree.root.name = 'Root'
     return tree
Esempio n. 11
0
File: phylo.py Progetto: xzy3/QuaSim
def build_phylogenetic_tree(seqs):
    calculator = DistanceCalculator(DISTANCE_TYPE)
    # Print distance matrix for testing
    # distance_matrix = calculator.get_distance(seqs)

    constructor = DistanceTreeConstructor(calculator,
                                          TREE_CONSTRUCTION_ALGORITHM)

    tree = constructor.build_tree(seqs)

    return tree
Esempio n. 12
0
def NJ_tree(infile, file_type):
    #Tree creation with neighbor-joining
    filename = "static/data/sauvegardes/" + dirName + infile
    aln = AlignIO.read(filename, file_type) #clustal si alignement clustal, fasta si alignement fasta
    calculator = DistanceCalculator('identity')
    constructor = DistanceTreeConstructor(calculator, 'nj') # nj ou UPGMA
    tree = constructor.build_tree(aln)
    tree.ladderize()
    Phylo.draw(tree, do_show=False)
    Phylo.write(tree, 'static/data/sauvegardes/' + dirName + 'tree.txt', "newick")
    foo = current_path + "/static/data/sauvegardes/" + dirName + 'tree.png'
    plt.savefig(foo)
Esempio n. 13
0
def blosumnj(filename):
    aln = AlignIO.read(open(filename), 'fasta')
    print(aln)

    calculator = DistanceCalculator('blosum62')
    dm = calculator.get_distance(aln)
    print(dm)

    from Bio.Phylo.TreeConstruction import DistanceTreeConstructor
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(aln)
    print(tree)
Esempio n. 14
0
    def tree(aln_item):
        aln = [item.data(35) for item in aln_item]
        for path in aln:
            handle = open(path)

        alignment = AlignIO.read(handle, "clustal")
        calculator = DistanceCalculator(
            'identity')  # Se calculan las distancias
        dm = calculator.get_distance(alignment)  # Se obtienen las distancias
        constructor = DistanceTreeConstructor(calculator)
        upgma_tree = constructor.build_tree(
            alignment)  # Se construye el arbol filogenetico
        Phylo.draw(upgma_tree)  # Grafico del arbol filogenetico
Esempio n. 15
0
    def __init__(self, proteina=""):
        self.proteina = proteina

        #if proteina != "":
        with open("./arbol.aln", "r") as aln:
            alignment = AlignIO.read(aln, "clustal")

        calculator = DistanceCalculator('identity')
        dm = calculator.get_distance(alignment)
        #print(dm)
        constructor = DistanceTreeConstructor(calculator)
        upgma_tree = constructor.build_tree(alignment)
        Phylo.draw(upgma_tree)
Esempio n. 16
0
def createFasta(inputList):
    file = open("phylogenetics.fasta", 'w')
    for s in range(100):
        s = inputList[s]
        file.write(">" + s.name + '\n')
        file.write(s.dnaGenome + '\n')
    file.close()
    aln = AlignIO.read('phylogenetics.fasta', 'fasta')
    #print(aln)
    calculator = DistanceCalculator('identity')
    dm = calculator.get_distance(aln)
    #print(dm)
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(aln)
    drawing = Phylo.draw_ascii(tree)
    return drawing
class DistanceTreeConstructorTest(unittest.TestCase):
    """Test DistanceTreeConstructor."""

    def setUp(self):
        self.aln = AlignIO.read("TreeConstruction/msa.phy", "phylip")
        calculator = DistanceCalculator("blosum62")
        self.dm = calculator.get_distance(self.aln)
        self.constructor = DistanceTreeConstructor(calculator)

    def test_upgma(self):
        tree = self.constructor.upgma(self.dm)
        self.assertIsInstance(tree, BaseTree.Tree)
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read("./TreeConstruction/upgma.tre", "newick")
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

    def test_nj(self):
        tree = self.constructor.nj(self.dm)
        self.assertIsInstance(tree, BaseTree.Tree)
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick")
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
        # ref_tree.close()

        # create a matrix of length 2
        calculator = DistanceCalculator("blosum62")
        self.min_dm = calculator.get_distance(self.aln)
        for i in range(len(self.min_dm) - 2):
            del self.min_dm[len(self.min_dm) - 1]

        min_tree = self.constructor.nj(self.min_dm)
        self.assertIsInstance(min_tree, BaseTree.Tree)

        ref_min_tree = Phylo.read("./TreeConstruction/nj_min.tre", "newick")
        self.assertTrue(Consensus._equal_topology(min_tree, ref_min_tree))

    def test_built_tree(self):
        tree = self.constructor.build_tree(self.aln)
        self.assertIsInstance(tree, BaseTree.Tree)
        # tree_file = StringIO()
        # Phylo.write(tree, tree_file, 'newick')
        ref_tree = Phylo.read("./TreeConstruction/nj.tre", "newick")
        self.assertTrue(Consensus._equal_topology(tree, ref_tree))
Esempio n. 18
0
def makePhyloTree(alignedfile, outfile, matrixfile):
    with open(alignedfile,'r') as a:
        align = AlignIO.read(a,'clustal')
        print(type(align))
        calculator = DistanceCalculator('identity')
        dist_matrix = calculator.get_distance(align)
        c = DistanceTreeConstructor(calculator)
        print(dist_matrix)
        f = open(matrixfile, "w")
        f.write(str(dist_matrix))
        f.close()
        phylotree = c.build_tree(align)
        phylotree.rooted = True
        #print(phylotree)
        Phylo.write(phylotree, outfile, "phyloxml")
        a.close()
        return phylotree
Esempio n. 19
0
def dendroNJ(inFile, model='identity', bootstrap=True, replicate=100):
    """
    Given an alingment in fasta format, the function returns a Neighbor Joining tree in newick format.
    Module required:
    - AlignIO (from Bio)
    - DistanceCalculator (from Bio.Phylo.TreeConstruction)
    - DistanceTreeConstructor (from Bio.Phylo.TreeConstruction)
    - bootstrap_consensus (from Bio.Phylo.Consensus)
    Usage: <inFile> <model (default = 'identity')> <bootstrap (default = True)>
                           <replicate (default = 100)>
    """
    aln = AlignIO.read(inFile, 'fasta') # read the alignment
    constructor = DistanceTreeConstructor(DistanceCalculator(model), 'nj')
    if bootstrap:
        tree = bootstrap_consensus(aln, int(replicate), constructor, majority_consensus)
    else:
        tree = constructor.build_tree(aln)
    return tree.format('newick')
Esempio n. 20
0
    def generate(self):
        FileOutput = open("./output.txt",'w')
        for i in range(self.listaB.lista.count()):
            itemList = self.listaB.lista.item(i).text().split('/')
            FileOutput.write("./FASTAS/{}/{}".format(itemList[0],itemList[1]))

        FileOutput.close()
        # Generar todo el fasta a alinear.
        with open('./output.txt') as f:
        	for line in f:
        		path = line.strip('\n').replace(" ","\\ ")
        		os.system('cat '+path+' >> all.fasta')
        #
        #MuscleCommandline
        os.system('muscle -in all.fasta -clwout tree.aln ')
        with open("./tree.aln", "r") as aln:
            algn = AlignIO.read(aln,"clustal")
        calculator = DistanceCalculator('identity')
        dm = calculator.get_distance(algn)
        constructor = DistanceTreeConstructor(calculator)
        upgma_tree = constructor.build_tree(algn)
        Phylo.draw(upgma_tree)
Esempio n. 21
0
    from Bio import AlignIO
    pocketAlignment = AlignIO.read(open(pocketAlignmentFile), "fasta")
    print("Calculating Distance Matrix")
    for substMat in substMatrices:
        try:
            alnFile = pocketAlignmentFile.split("/")[-1]
            fname = "autotree/" + substMat + "_" + alnFile + ".pxml"
            print(fname)
            if (os.path.exists(fname)):
                tree = Phylo.read(fname, "phyloxml")
            else:
                calculator = DistanceCalculator(substMat)
                dm = calculator.get_distance(pocketAlignment)
                print("Building tree")
                constructor = DistanceTreeConstructor(calculator, 'nj')
                tree = constructor.build_tree(pocketAlignment)
                Phylo.write(tree, fname, "phyloxml")

            evaluateTree(tree, substMat)

        except Exception as err:
            print("Error for ", substMat)
            print(err)
            pass
else:
    print("Reading tree")
    tree = Phylo.read(treeFile, "newick")
    evaluateTree(tree)

#peters-macbook-pro:alignments peter$ python 04_printEnrichment.py trees/2xb7_manning_blossum90_manning.fasta 35 70 data/hms_lincs/activities/hms_20052.csv  "% Control" trees/2xb7_manning_blossum90_manning.nwk
Esempio n. 22
0
#print(type(alignment))

# Open the distance calculator and create a distance matrix

from Bio.Phylo.TreeConstruction import DistanceCalculator

calculator = DistanceCalculator('identity')
distance_matrix = calculator.get_distance(alignment)
#print(distance_matrix)

# Open the tree constructor and build a tree

from Bio.Phylo.TreeConstruction import DistanceTreeConstructor

constructor = DistanceTreeConstructor(calculator)
shark_tree = constructor.build_tree(alignment)
shark_tree.rooted = True
#print(shark_tree)
Phylo.write(shark_tree, "shark_tree.xml", "phyloxml")

# Create the tree figure

Phylo.draw_ascii(shark_tree)

fig = plt.figure(figsize=(13, 5), dpi=100)  # create figure & set the size
matplotlib.rc('font', size=12)  # fontsize of the leaf and node labels
matplotlib.rc('xtick', labelsize=10)  # fontsize of the tick labels
matplotlib.rc('ytick', labelsize=10)  # fontsize of the tick labels
#turtle_tree.ladderize()		   # optional: re-order the tree
axes = fig.add_subplot(1, 1, 1)
Phylo.draw(shark_tree, axes=axes)
Esempio n. 23
0
input_handle.close()
output_handle.close()


# convert the clustalW format to phylip for the program
from Bio import AlignIO
AlignIO.convert("BRCA2_family_fixed.fasta", "fasta", "BRCA2_family.phy", "phylip")

# Read the sequences and align
aln = AlignIO.read('BRCA2_family.phy', 'phylip')

# create a starting tree with NJ
calculator = DistanceCalculator('identity')
dm = calculator.get_distance(aln)
constructor = DistanceTreeConstructor(calculator, 'nj')
starting_tree = constructor.build_tree(aln)


# A substitution cost matrix, used from in-lecture excise (penalty of 2 for transversion and gap, penalty of 1 for
# # transition)
cost_matrix = [[0],
            [2,0],
            [1,2,0],
            [2,1,2,0],
            [2,2,2,2,0]]

# weighted cost matrix corresponds to
weight = DistanceMatrix(names=['A', 'C', 'G', 'T','-'], matrix=cost_matrix)

# ParsimonyScorer will use the Sankoff Algorithm when provided with a matrix argument
scorer = ParsimonyScorer(matrix=weight)
Esempio n. 24
0

        infile = ''.join(name.split(".")[:-1]) + 'infile.txt'
        fn = ''.join(name.split(".")[:-1]) + 'phylo.txt'
        write_leaves_to_charmat(target_nodes_original_network_uniq, fn)

        os.system("python2 /home/mattjones/projects/scLineages/SingleCellLineageTracing/scripts/binarize_multistate_charmat.py " + fn + " " + infile)
        aln = AlignIO.read(infile, "phylip")

        aln = unique_alignments(aln)

        calculator = DistanceCalculator('identity', skip_letters='?')
        constructor = DistanceTreeConstructor(calculator, 'nj')

        t0 = time.time()
        tree = constructor.build_tree(aln)
	t1 = time.time()

        out = stem + "_nj.txt"
        print(out)
        Phylo.write(tree, out, 'newick')

        nj_net = newick_to_network(out)

        #newick = convert_network_to_newick_format(nj_net)
        #with open(out, "w") as f:
        #    f.write(newick)
    
        # old code for using Phylo to parse newick files to networkx objects
        #nj_net = Phylo.to_networkx(tree)
Esempio n. 25
0
def main():
    """
    Takes in a character matrix, an algorithm, and an output file and
    returns a tree in newick format.

    """
    parser = argparse.ArgumentParser()
    parser.add_argument("netfp", type=str, help="character_matrix")
    parser.add_argument("-nj",
                        "--neighbor-joining",
                        action="store_true",
                        default=False)
    parser.add_argument("--neighbor_joining_weighted",
                        action="store_true",
                        default=False)
    parser.add_argument("--ilp", action="store_true", default=False)
    parser.add_argument("--hybrid", action="store_true", default=False)
    parser.add_argument("--cutoff",
                        type=int,
                        default=80,
                        help="Cutoff for ILP during Hybrid algorithm")
    parser.add_argument(
        "--hybrid_lca_mode",
        action="store_true",
        help=
        "Use LCA distances to transition in hybrid mode, instead of number of cells",
    )
    parser.add_argument("--time_limit",
                        type=int,
                        default=-1,
                        help="Time limit for ILP convergence")
    parser.add_argument(
        "--iter_limit",
        type=int,
        default=-1,
        help="Max number of iterations for ILP solver",
    )
    parser.add_argument("--greedy", "-g", action="store_true", default=False)
    parser.add_argument("--camin-sokal",
                        "-cs",
                        action="store_true",
                        default=False)
    parser.add_argument("--verbose",
                        action="store_true",
                        default=False,
                        help="output verbosity")
    parser.add_argument("--mutation_map", type=str, default="")
    parser.add_argument("--num_threads", type=int, default=1)
    parser.add_argument("--no_triplets", action="store_true", default=False)
    parser.add_argument("--max_neighborhood_size", type=str, default=3000)
    parser.add_argument("--out_fp",
                        type=str,
                        default=None,
                        help="optional output file")
    parser.add_argument("--seed",
                        type=int,
                        default=None,
                        help="Random seed for ILP solver")

    args = parser.parse_args()

    netfp = args.netfp
    outfp = args.out_fp
    verbose = args.verbose

    lca_mode = args.hybrid_lca_mode
    if lca_mode:
        lca_cutoff = args.cutoff
        cell_cutoff = None
    else:
        cell_cutoff = args.cutoff
        lca_cutoff = None
    time_limit = args.time_limit
    iter_limit = args.iter_limit
    num_threads = args.num_threads
    max_neighborhood_size = args.max_neighborhood_size
    seed = args.seed

    if seed is not None:
        random.seed(seed)
        np.random.seed(seed)

    score_triplets = not args.no_triplets

    prior_probs = None
    if args.mutation_map != "":

        prior_probs = pic.load(open(args.mutation_map, "rb"))

    name = netfp.split("/")[-1]
    stem = ".".join(name.split(".")[:-1])

    true_network = nx.read_gpickle(netfp)

    if isinstance(true_network, Cassiopeia_Tree):
        true_network = true_network.get_network()

    target_nodes = get_leaves_of_tree(true_network)

    target_nodes_uniq = []
    seen_charstrings = []
    for t in target_nodes:
        if t.char_string not in seen_charstrings:
            seen_charstrings.append(t.char_string)
            target_nodes_uniq.append(t)

    if args.greedy:

        if verbose:
            print("Running Greedy Algorithm on " +
                  str(len(target_nodes_uniq)) + " Cells")

        reconstructed_network_greedy = solve_lineage_instance(
            target_nodes_uniq,
            method="greedy",
            prior_probabilities=prior_probs)

        net = reconstructed_network_greedy[0]

        if outfp is None:
            outfp = name.replace("true", "greedy")
        pic.dump(net, open(outfp, "wb"))

    elif args.hybrid:

        if verbose:
            print("Running Hybrid Algorithm on " +
                  str(len(target_nodes_uniq)) + " Cells")
            print("Parameters: ILP on sets of " + str(cutoff) + " cells " +
                  str(time_limit) + "s to complete optimization")

        reconstructed_network_hybrid = solve_lineage_instance(
            target_nodes_uniq,
            method="hybrid",
            hybrid_cell_cutoff=cell_cutoff,
            hybrid_lca_cutoff=lca_cutoff,
            prior_probabilities=prior_probs,
            time_limit=time_limit,
            threads=num_threads,
            max_neighborhood_size=max_neighborhood_size,
            seed=seed,
            num_iter=iter_limit,
        )

        net = reconstructed_network_hybrid[0]

        if outfp is None:
            outfp = name.replace("true", "hybrid")
        pic.dump(net, open(outfp, "wb"))

    elif args.ilp:

        if verbose:
            print("Running Hybrid Algorithm on " +
                  str(len(target_nodes_uniq)) + " Cells")
            print("Parameters: ILP on sets of " + str(cutoff) + " cells " +
                  str(time_limit) + "s to complete optimization")

        reconstructed_network_ilp = solve_lineage_instance(
            target_nodes_uniq,
            method="ilp",
            hybrid_subset_cutoff=cutoff,
            prior_probabilities=prior_probs,
            time_limit=time_limit,
            max_neighborhood_size=max_neighborhood_size,
            seed=seed,
            num_iter=iter_limit,
        )

        net = reconstructed_network_ilp[0]
        # reconstructed_network_ilp = nx.relabel_nodes(reconstructed_network_ilp, string_to_sample)
        if outfp is None:
            outfp = name.replace("true", "ilp")
        pic.dump(net, open(outfp, "wb"))

    elif args.neighbor_joining:

        if verbose:
            print("Running Neighbor-Joining on " +
                  str(len(target_nodes_uniq)) + " Unique Cells")

        infile = "".join(name.split(".")[:-1]) + "infile.txt"
        fn = "".join(name.split(".")[:-1]) + "phylo.txt"
        write_leaves_to_charmat(target_nodes_uniq, fn)

        script = SCLT_PATH / "TreeSolver" / "binarize_multistate_charmat.py"
        cmd = "python3.6 " + str(
            script) + " " + fn + " " + infile + " --relaxed"
        p = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p.pid, 0)

        aln = AlignIO.read(infile, "phylip-relaxed")

        aln = unique_alignments(aln)

        t0 = time.time()
        calculator = DistanceCalculator("identity", skip_letters="?")
        constructor = DistanceTreeConstructor(calculator, "nj")

        tree = constructor.build_tree(aln)

        tree.root_at_midpoint()

        nj_net = Phylo.to_networkx(tree)

        # convert labels to characters for writing to file
        i = 0
        rndict = {}
        for n in nj_net:

            if n.name is None:
                rndict[n] = Node("state-node", [])
                # n.name = "internal" + str(i)
                # i += 1
            else:
                rndict[n] = Node(n.name, [])

        nj_net = nx.relabel_nodes(nj_net, rndict)

        # convert labels to strings, not Bio.Phylo.Clade objects
        # c2str = map(lambda x: x.name, list(nj_net.nodes()))
        # c2strdict = dict(zip(list(nj_net.nodes()), c2str))
        # nj_net = nx.relabel_nodes(nj_net, c2strdict)

        cm = pd.read_csv(fn, sep="\t", index_col=0)

        cm_lookup = dict(
            zip(
                list(
                    cm.apply(lambda x: "|".join([str(k) for k in x.values]),
                             axis=1)),
                cm.index.values,
            ))

        nj_net = fill_in_tree(nj_net, cm)

        nj_net = tree_collapse(nj_net)

        for n in nj_net:
            if n.char_string in cm_lookup.keys():
                n.is_target = True

        nj_net = Cassiopeia_Tree("neighbor-joining", network=nj_net)
        if outfp is None:
            outfp = name.replace("true", "nj")
        pic.dump(nj_net, open(outfp, "wb"))
        # Phylo.write(tree, out, 'newick')

        os.system("rm " + infile)
        os.system("rm " + fn)

    elif args.neighbor_joining_weighted:

        if verbose:
            print("Running Neighbor-Joining with Weighted Scoring on " +
                  str(len(target_nodes_uniq)) + " Unique Cells")

        target_node_charstrings = np.array(
            [t.get_character_vec() for t in target_nodes_uniq])
        dm = compute_distance_mat(target_node_charstrings,
                                  len(target_node_charstrings),
                                  priors=prior_probs)

        ids = [t.name for t in target_nodes_uniq]
        cm_uniq = pd.DataFrame(target_node_charstrings)
        cm_uniq.index = ids
        dm = sp.spatial.distance.squareform(dm)

        dm = DistanceMatrix(dm, ids)

        newick_str = nj(dm, result_constructor=str)

        tree = newick_to_network(newick_str, cm_uniq)

        nj_net = fill_in_tree(tree, cm_uniq)
        nj_net = tree_collapse(nj_net)

        cm_lookup = dict(
            zip(
                list(
                    cm_uniq.apply(
                        lambda x: "|".join([str(k) for k in x.values]),
                        axis=1)),
                cm_uniq.index.values,
            ))

        rdict = {}
        for n in nj_net:
            if n.char_string in cm_lookup:
                n.is_target = True
            else:
                n.is_target = False

        nj_net = Cassiopeia_Tree("neighbor-joining", network=nj_net)
        if outfp is None:
            outfp = name.replace("true", "nj_weighted")
        pic.dump(nj_net, open(outfp, "wb"))

    elif args.camin_sokal:

        if verbose:
            print("Running Camin-Sokal Max Parsimony Algorithm on " +
                  str(len(target_nodes_uniq)) + " Unique Cells")

        samples_to_cells = {}
        indices = []
        for i, n in zip(range(len(target_nodes_uniq)), target_nodes_uniq):
            samples_to_cells["s" + str(i)] = n.name
            indices.append(n.name)
            n.name = str(i)

        infile = "".join(name.split(".")[:-1]) + "_cs_infile.txt"
        fn = "".join(name.split(".")[:-1]) + "_cs_phylo.txt"
        weights_fn = "".join(name.split(".")[:-1]) + "_cs_weights.txt"
        write_leaves_to_charmat(target_nodes_uniq, fn)

        script = SCLT_PATH / "TreeSolver" / "binarize_multistate_charmat.py"
        cmd = "python3.6 " + str(script) + " " + fn + " " + infile
        pi = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(pi.pid, 0)

        weights = construct_weights(infile, weights_fn)

        os.system("touch outfile")
        os.system("touch outtree")

        outfile = stem + "outfile.txt"
        outtree = stem + "outtree.txt"
        # run phylip mix with camin-sokal
        responses = "." + stem + ".temp.txt"
        FH = open(responses, "w")
        current_dir = os.getcwd()
        FH.write(infile + "\n")
        FH.write("F\n" + outfile + "\n")
        FH.write("P\n")
        FH.write("W\n")
        FH.write("Y\n")
        FH.write(weights_fn + "\n")
        FH.write("F\n" + outtree + "\n")
        FH.close()

        t0 = time.time()
        cmd = "~/software/phylip-3.697/exe/mix"
        cmd += " < " + responses + " > screenout1"
        p = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p.pid, 0)

        consense_outtree = stem + "consenseouttree.txt"
        consense_outfile = stem + "consenseoutfile.txt"

        FH = open(responses, "w")
        FH.write(outtree + "\n")
        FH.write("F\n" + consense_outfile + "\n")
        FH.write("Y\n")
        FH.write("F\n" + consense_outtree + "\n")
        FH.close()

        if verbose:
            print("Computing Consensus Tree, elasped time: " +
                  str(time.time() - t0))

        cmd = "~/software/phylip-3.697/exe/consense"
        cmd += " < " + responses + " > screenout"
        p2 = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p2.pid, 0)

        newick_str = ""
        with open(consense_outtree, "r") as f:
            for l in f:
                l = l.strip()
                newick_str += l

        cm = pd.read_csv(fn, sep="\t", index_col=0, dtype=str)
        cm.index = indices

        cs_net = newick_to_network(newick_str, cm)

        for n in cs_net:
            if n.name in samples_to_cells:
                n.name = samples_to_cells[n.name]

        cs_net = fill_in_tree(cs_net, cm)

        cs_net = tree_collapse2(cs_net)

        cm_lookup = dict(
            zip(
                list(
                    cm.apply(lambda x: "|".join([str(k) for k in x.values]),
                             axis=1)),
                cm.index.values,
            ))

        for n in cs_net:
            if n.char_string in cm_lookup.keys():
                n.is_target = True

        cs_net = Cassiopeia_Tree("camin-sokal", network=cs_net)
        if outfp is None:
            outfp = name.replace("true", "cs")
        pic.dump(cs_net, open(outfp, "wb"))

        os.system("rm " + outfile)
        os.system("rm " + responses)
        os.system("rm " + outtree)
        os.system("rm " + consense_outfile)
        os.system("rm " + infile)
        os.system("rm " + fn)

    else:

        raise Exception(
            "Please choose an algorithm from the list: greedy, hybrid, ilp, nj, or camin-sokal"
        )
Esempio n. 26
0
# CAGTTCGCCACAA Gamma

# Several thigns can be done witht he alignment: get a distance matrix from it:
dstcalc = DistanceCalculator('identity')
dm = dstcalc.get_distance(aln)
# DistanceMatrix(names=['Alpha', 'Beta', 'Gamma', 'Delta', 'Epsilon'], matrix=[[0], [0.23076923076923073, 0], [0.3846153846153846, 0.23076923076923073, 0], [0.5384615384615384, 0.5384615384615384, 0.5384615384615384, 0], [0.6153846153846154, 0.3846153846153846, 0.46153846153846156, 0.15384615384615385, 0]])
print "What's the get_distance(aln) from DistanceCalculator('identity') object?"
print type(dm)
print dm
# Alpha   0
# Beta    0.230769230769  0
# Gamma   0.384615384615  0.230769230769  0
# Delta   0.538461538462  0.538461538462  0.538461538462  0
# Epsilon 0.615384615385  0.384615384615  0.461538461538  0.153846153846  0

# build a tree from it.
from Bio.Phylo.TreeConstruction import DistanceTreeConstructor

construc0 = DistanceTreeConstructor(dstcalc, 'nj')
tre0 = construc0.build_tree(aln)
print type(tre0)
# as you can see from abovedstcalc is needed for te constructor and then
# to build the tree the alignment is needed. That's two things which need to originae fromt he same thing.
# A bit of a tall order
# You can build the tree from a distance matrix only, by leaving out the aln argument
# by not using the build_tree method on the constructor, but rather the .nj method

construc2 = DistanceTreeConstructor()
tre2 = construc2.nj(dm)
print type(tre2)
Esempio n. 27
0
    args = parser.parse_args()

    records = [x for x in SeqIO.parse(args.fasta, 'fasta')]

    aligned = align_clustalW_records(records)

    seqCount = 0

    for idx, cluster in enumerate(aligned):
        seqCount += 1
        #print(aligned[idx].id + "\t" + aligned[idx].seq)

    calculator = DistanceCalculator('identity')
    constructor = DistanceTreeConstructor(calculator, 'upgma')

    tree = constructor.build_tree(aligned)
    dm = calculator.get_distance(aligned)

    #tree.ladderize()  # Flip branches so deeper clades are displayed at top
    #Phylo.draw_ascii(tree)

    newmat = dm.matrix
    nnmat = []
    for elem in newmat:
        nnmat.append(elem[:-1])

    newe = None
    olde = None

    nclusts = 0
def run_nj_naive(cm_uniq, stem, verbose=True):

    if verbose:
        print("Running Neighbor-Joining on " + str(cm_uniq.shape[0]) +
              " Unique Cells")

    cm_lookup = list(cm_uniq.apply(lambda x: "|".join(x.values), axis=1))

    fn = stem + "phylo.txt"
    infile = stem + "infile.txt"

    cm_uniq.to_csv(fn, sep='\t')

    script = (SCLT_PATH / 'TreeSolver' / 'binarize_multistate_charmat.py')
    cmd = "python3.6 " + str(script) + " " + fn + " " + infile + " --relaxed"
    p = subprocess.Popen(cmd, shell=True)
    pid, ecode = os.waitpid(p.pid, 0)

    aln = AlignIO.read(infile, "phylip-relaxed")

    calculator = DistanceCalculator('identity')
    constructor = DistanceTreeConstructor(calculator, 'nj')
    tree = constructor.build_tree(aln)

    tree.root_at_midpoint()

    nj_net = Phylo.to_networkx(tree)

    # convert labels to characters for writing to file
    rndict = {}
    for n in nj_net:

        if n.name is None:
            rndict[n] = Node('state-node', [])
        elif n.name in cm_uniq:
            rndict[n] = Node(n.name, cm_uniq.loc[n.name].values)

    # convert labels to strings, not Bio.Phylo.Clade objects
    #c2str = map(lambda x: x.name, list(nj_net.nodes()))
    #c2strdict = dict(zip(list(nj_net.nodes()), c2str))
    nj_net = nx.relabel_nodes(nj_net, rndict)

    # nj_net = fill_in_tree(nj_net, cm_uniq)

    # nj_net = tree_collapse2(nj_net)

    rdict = {}
    for n in nj_net:
        if nj_net.out_degree(n) == 0 and n.char_string in cm_lookup:
            n.is_target = True
        else:
            n.is_target = False

    state_tree = nj_net
    ret_tree = Cassiopeia_Tree(method='neighbor-joining',
                               network=state_tree,
                               name='Cassiopeia_state_tree')

    os.system("rm " + infile)
    os.system("rm " + fn)

    return ret_tree
Esempio n. 29
0
def main():
    """
    Takes in a character matrix, an algorithm, and an output file and 
    returns a tree in newick format.

    """

    parser = argparse.ArgumentParser()
    parser.add_argument("char_fp", type=str, help="character_matrix")
    parser.add_argument("out_fp", type=str, help="output file name")
    parser.add_argument("-nj",
                        "--neighbor-joining",
                        action="store_true",
                        default=False)
    parser.add_argument("--ilp", action="store_true", default=False)
    parser.add_argument("--hybrid", action="store_true", default=False)
    parser.add_argument("--cutoff",
                        type=int,
                        default=80,
                        help="Cutoff for ILP during Hybrid algorithm")
    parser.add_argument("--time_limit",
                        type=int,
                        default=1500,
                        help="Time limit for ILP convergence")
    parser.add_argument("--greedy", "-g", action="store_true", default=False)
    parser.add_argument("--camin-sokal",
                        "-cs",
                        action="store_true",
                        default=False)
    parser.add_argument("--verbose",
                        action="store_true",
                        default=False,
                        help="output verbosity")
    parser.add_argument("--mutation_map", type=str, default="")
    parser.add_argument("--num_threads", type=int, default=1)
    parser.add_argument("--max_neighborhood_size", type=int, default=10000)

    args = parser.parse_args()

    char_fp = args.char_fp
    out_fp = args.out_fp
    verbose = args.verbose

    cutoff = args.cutoff
    time_limit = args.time_limit
    num_threads = args.num_threads

    max_neighborhood_size = args.max_neighborhood_size

    stem = ''.join(char_fp.split(".")[:-1])

    cm = pd.read_csv(char_fp, sep='\t', index_col=0)
    cm_uniq = cm.drop_duplicates(inplace=False)

    newick = ""

    prior_probs = None
    if args.mutation_map != "":

        prior_probs = read_mutation_map(args.mutation_map)

    if args.greedy:

        target_nodes = cm_uniq.astype(str).apply(lambda x: '|'.join(x), axis=1)

        if verbose:
            print('Running Greedy Algorithm on ' + str(len(target_nodes)) +
                  " Cells")

        string_to_sample = dict(zip(target_nodes, cm.index))

        target_nodes = map(lambda x, n: x + "_" + n, target_nodes,
                           cm_uniq.index)

        reconstructed_network_greedy = solve_lineage_instance(
            target_nodes, method="greedy", prior_probabilities=prior_probs)

        # score parsimony
        score = 0
        for e in reconstructed_network_greedy.edges():
            score += get_edge_length(e[0], e[1])

        print("Parsimony: " + str(score))

        #reconstructed_network_greedy = nx.relabel_nodes(reconstructed_network_greedy, string_to_sample)
        newick = convert_network_to_newick_format(reconstructed_network_greedy)

        with open(out_fp, "w") as f:
            f.write(newick)

        out_stem = "".join(out_fp.split(".")[:-1])
        pic.dump(reconstructed_network_greedy, open(out_stem + ".pkl", "wb"))

    elif args.hybrid:

        target_nodes = cm_uniq.astype(str).apply(lambda x: '|'.join(x), axis=1)

        if verbose:
            print('Running Hybrid Algorithm on ' + str(len(target_nodes)) +
                  " Cells")
            print('Parameters: ILP on sets of ' + str(cutoff) + ' cells ' +
                  str(time_limit) + 's to complete optimization')

        string_to_sample = dict(zip(target_nodes, cm.index))

        target_nodes = map(lambda x, n: x + "_" + n, target_nodes,
                           cm_uniq.index)

        print("running algorithm...")
        reconstructed_network_hybrid = solve_lineage_instance(
            target_nodes,
            method="hybrid",
            hybrid_subset_cutoff=cutoff,
            prior_probabilities=prior_probs,
            time_limit=time_limit,
            threads=num_threads,
            max_neighborhood_size=max_neighborhood_size)

        if verbose:
            print("Scoring Parsimony...")

        # score parsimony
        score = 0
        for e in reconstructed_network_hybrid.edges():
            score += get_edge_length(e[0], e[1])

        if verbose:
            print("Parsimony: " + str(score))

        if verbose:
            print("Writing the tree to output...")

        #reconstructed_network_hybrid = nx.relabel_nodes(reconstructed_network_hybrid, string_to_sample)

        out_stem = "".join(out_fp.split(".")[:-1])
        pic.dump(reconstructed_network_hybrid, open(out_stem + ".pkl", "wb"))

        newick = convert_network_to_newick_format(reconstructed_network_hybrid)

        with open(out_fp, "w") as f:
            f.write(newick)

    elif args.ilp:

        target_nodes = cm_uniq.astype(str).apply(lambda x: '|'.join(x), axis=1)

        if verbose:
            print("Running ILP Algorithm on " + str(len(target_nodes)) +
                  " Unique Cells")
            print("Paramters: ILP allowed " + str(time_limit) +
                  "s to complete optimization")

        string_to_sample = dict(zip(target_nodes, cm.index))

        target_nodes = map(lambda x, n: x + "_" + n, target_nodes,
                           cm_uniq.index)

        reconstructed_network_ilp = solve_lineage_instance(
            target_nodes,
            method="ilp",
            prior_probabilities=prior_probs,
            time_limit=time_limit,
            max_neighborhood_size=max_neighborhood_size)

        # score parsimony
        score = 0
        for e in reconstructed_network_ilp.edges():
            score += get_edge_length(e[0], e[1])

        print("Parsimony: " + str(score))

        #reconstructed_network_ilp = nx.relabel_nodes(reconstructed_network_ilp, string_to_sample)
        newick = convert_network_to_newick_format(reconstructed_network_ilp)

        with open(out_fp, "w") as f:
            f.write(newick)

        out_stem = "".join(out_fp.split(".")[:-1])
        pic.dump(reconstructed_network_ilp, open(out_stem + ".pkl", "wb"))

    elif args.neighbor_joining:

        cm.drop_duplicates(inplace=True)

        if verbose:
            print("Running Neighbor-Joining on " + str(cm.shape[0]) +
                  " Unique Cells")

        fn = stem + "phylo.txt"
        infile = stem + "infile.txt"

        cm.to_csv(fn, sep='\t')

        os.system(
            "python2 ~/projects/scLineages/SingleCellLineageTracing/scripts/binarize_multistate_charmat.py "
            + fn + " " + infile + " --relaxed")
        aln = AlignIO.read(infile, "phylip-relaxed")

        calculator = DistanceCalculator('identity')
        constructor = DistanceTreeConstructor(calculator, 'nj')
        tree = constructor.build_tree(aln)

        tree.root_at_midpoint()

        nj_net = Phylo.to_networkx(tree)

        # convert labels to characters for writing to file
        i = 0
        for n in nj_net:

            if n.name is None:
                n.name = "internal" + str(i)
                i += 1

        # convert labels to strings, not Bio.Phylo.Clade objects
        c2str = map(lambda x: x.name, nj_net.nodes())
        c2strdict = dict(zip(nj_net.nodes(), c2str))
        nj_net = nx.relabel_nodes(nj_net, c2strdict)

        nj_net = tree_collapse(nj_net)

        out_stem = "".join(out_fp.split(".")[:-1])
        pic.dump(nj_net, open(out_stem + ".pkl", "wb"))

        newick = convert_network_to_newick_format(nj_net)

        with open(out_fp, "w") as f:
            f.write(newick)

        os.system("rm " + infile)
        os.system("rm " + fn)

    elif args.camin_sokal:

        cells = cm.index
        samples = [("s" + str(i)) for i in range(len(cells))]
        samples_to_cells = dict(zip(samples, cells))

        cm.index = list(range(len(cells)))

        if verbose:
            print("Running Camin-Sokal on " + str(cm.shape[0]) +
                  " Unique Cells")

        infile = stem + 'infile.txt'
        fn = stem + "phylo.txt"
        weights_fn = stem + "weights.txt"

        cm.to_csv(fn, sep='\t')

        os.system(
            "python2 /home/mattjones/projects/scLineages/SingleCellLineageTracing/scripts/binarize_multistate_charmat.py "
            + fn + " " + infile)

        weights = construct_weights(infile, weights_fn)

        outfile = stem + 'outfile.txt'
        outtree = stem + 'outtree.txt'
        # run phylip mix with camin-sokal
        responses = "." + stem + ".temp.txt"
        FH = open(responses, 'w')
        current_dir = os.getcwd()
        FH.write(infile + "\n")
        FH.write("F\n" + outfile + "\n")
        FH.write("P\n")
        FH.write("Y\n")
        FH.write("F\n" + outtree + "\n")
        FH.close()

        t0 = time.time()
        cmd = "~/software/phylip-3.697/exe/mix"
        cmd += " < " + responses + " > screenout"
        p = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p.pid, 0)

        consense_outtree = stem + "consenseouttree.txt"
        consense_outfile = stem + "conenseoutfile.txt"

        FH = open(responses, "w")
        FH.write(outtree + "\n")
        FH.write("F\n" + consense_outfile + "\n")
        FH.write("Y\n")
        FH.write("F\n" + consense_outtree + "\n")
        FH.close()

        if verbose:
            print("Computing Consensus Tree, elasped time: " +
                  str(time.time() - t0))

        cmd = "~/software/phylip-3.697/exe/consense"
        cmd += " < " + responses + " > screenout2"
        p2 = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p2.pid, 0)

        newick_str = ""
        with open(consense_outtree, "r") as f:
            for l in f:
                l = l.strip()
                newick_str += l

        #tree = Phylo.parse(consense_outtree, "newick").next()
        tree = newick_to_network(newick_str)
        #tree.rooted = True
        cs_net = tree_collapse(tree)
        #cs_net = Phylo.to_networkx(tree)

        cs_net = nx.relabel_nodes(cs_net, samples_to_cells)

        out_stem = "".join(out_fp.split(".")[:-1])

        pic.dump(cs_net, open(out_stem + ".pkl", "wb"))

        newick = convert_network_to_newick_format(cs_net)

        with open(out_fp, "w") as f:
            f.write(newick)

        os.system("rm " + outfile)
        os.system("rm " + responses)
        os.system("rm " + outtree)
        os.system("rm " + consense_outfile)
        os.system("rm " + infile)
        os.system("rm " + fn)

    elif alg == "--max-likelihood" or alg == '-ml':

        #cells = cm.index
        #samples = [("s" + str(i)) for i in range(len(cells))]
        #samples_to_cells = dict(zip(samples, cells))

        #cm.index = list(range(len(cells)))

        if verbose:
            print("Running Camin-Sokal on " + str(cm.shape[0]) +
                  " Unique Cells")

        infile = stem + 'infile.txt'
        fn = stem + "phylo.txt"

        cm.to_csv(fn, sep='\t')

        os.system(
            "python2 /home/mattjones/projects/scLineages/SingleCellLineageTracing/scripts/binarize_multistate_charmat.py "
            + fn + " " + infile + " --relaxed")

        os.system("/home/mattjones/software/FastTreeMP < " + infile + " > " +
                  out_fp)

        tree = Phylo.parse(out_fp, "newick").next()

        ml_net = Phylo.to_networkx(tree)

        i = 0
        for n in ml_net:
            if n.name is None:
                n.name = "internal" + str(i)
                i += 1

        c2str = map(lambda x: str(x), ml_net.nodes())
        c2strdict = dict(zip(ml_net.nodes(), c2str))
        ml_net = nx.relabel_nodes(ml_net, c2strdict)

        out_stem = "".join(out_fp.split(".")[:-1])

        pic.dump(ml_net, open(out_stem + ".pkl", "wb"))

        os.system("rm " + infile)
        os.system("rm " + fn)

    else:

        raise Exception(
            "Please choose an algorithm from the list: greedy, hybrid, ilp, nj, max-likelihood, or camin-sokal"
        )
Esempio n. 30
0
print distanceMatrix

aln = AlignIO.read('/home/harsheel/CPME Assignments/genealign.fasta', 'fasta')
print aln

calculator = DistanceCalculator('identity')
dm = calculator.get_distance(aln)
print dm


constructor1 = DistanceTreeConstructor(calculator,'upgma')  #rooted=True
constructor2 = DistanceTreeConstructor(calculator,'nj')  #rooted=False

#FH=open('/home/harsheel/CPME Assignments/distanceMatrixTest.txt','r')
tree1=constructor1.build_tree(aln)
tree2=constructor2.build_tree(aln)

Bio.Phylo.draw_ascii(tree1)
#Bio.Phylo.draw(tree1)

Bio.Phylo.draw_ascii(tree2)
#Bio.Phylo.draw(tree2)

#tree = tree.as_phyloxml()
#tree1.print_newick()

print tree1
print tree2

Bio.Phylo.write(tree1, 'tree1.tre', 'newick')
Esempio n. 31
0
def main():
    """
    Takes in a character matrix, an algorithm, and an output file and
    returns a tree in newick format.

    """
    parser = argparse.ArgumentParser()
    parser.add_argument("netfp", type=str, help="character_matrix")
    parser.add_argument("typ", type=str, help="category of stress test")
    parser.add_argument("-nj",
                        "--neighbor-joining",
                        action="store_true",
                        default=False)
    parser.add_argument("--ilp", action="store_true", default=False)
    parser.add_argument("--hybrid", action="store_true", default=False)
    parser.add_argument("--cutoff",
                        type=int,
                        default=80,
                        help="Cutoff for ILP during Hybrid algorithm")
    parser.add_argument("--time_limit",
                        type=int,
                        default=1500,
                        help="Time limit for ILP convergence")
    parser.add_argument("--greedy", "-g", action="store_true", default=False)
    parser.add_argument("--camin-sokal",
                        "-cs",
                        action="store_true",
                        default=False)
    parser.add_argument("--verbose",
                        action="store_true",
                        default=False,
                        help="output verbosity")
    parser.add_argument("--mutation_map", type=str, default="")
    parser.add_argument("--num_threads", type=int, default=1)
    parser.add_argument("--no_triplets", action="store_true", default=False)
    parser.add_argument("--max_neighborhood_size", type=str, default=3000)

    args = parser.parse_args()

    netfp = args.netfp
    t = args.typ
    verbose = args.verbose

    cutoff = args.cutoff
    time_limit = args.time_limit
    num_threads = args.num_threads
    max_neighborhood_size = args.max_neighborhood_size

    score_triplets = (not args.no_triplets)

    name = netfp.split("/")[-1]
    spl = name.split("_")
    param = spl[-3]
    run = spl[-1].split(".")[0]

    prior_probs = None
    if args.mutation_map != "":

        prior_probs = pic.load(open(args.mutation_map, "rb"))

    stem = '.'.join(name.split(".")[:-1])

    true_network = nx.read_gpickle(netfp)

    target_nodes = get_leaves_of_tree(true_network, clip_identifier=True)
    target_nodes_original_network = get_leaves_of_tree(true_network,
                                                       clip_identifier=False)

    k = map(lambda x: "s" + x.split("_")[-1], target_nodes_original_network)
    s_to_char = dict(zip(k, target_nodes))
    char_to_s = dict(zip(target_nodes, k))

    unique_ii = np.unique(target_nodes, return_index=True)
    target_nodes_uniq = np.array(target_nodes)[unique_ii[1]]
    target_nodes_original_network_uniq = np.array(
        target_nodes_original_network)[unique_ii[1]]

    string_to_sample = dict(zip(target_nodes, target_nodes_original_network))

    if args.greedy:

        if verbose:
            print('Running Greedy Algorithm on ' +
                  str(len(target_nodes_uniq)) + " Cells")

        reconstructed_network_greedy = solve_lineage_instance(
            target_nodes_uniq,
            method="greedy",
            prior_probabilities=prior_probs)

        #reconstructed_network_greedy = nx.relabel_nodes(reconstructed_network_greedy, string_to_sample)

        newick = convert_network_to_newick_format(reconstructed_network_greedy)
        out = stem + "_greedy.txt"
        #with open(out, "w") as f:
        #    f.write(newick)

        pic.dump(reconstructed_network_greedy,
                 open(name.replace("true", "greedy"), "wb"))

    elif args.hybrid:

        if verbose:
            print('Running Hybrid Algorithm on ' +
                  str(len(target_nodes_uniq)) + " Cells")
            print('Parameters: ILP on sets of ' + str(cutoff) + ' cells ' +
                  str(time_limit) + 's to complete optimization')

        reconstructed_network_hybrid = solve_lineage_instance(
            target_nodes_original_network_uniq,
            method="hybrid",
            hybrid_subset_cutoff=cutoff,
            prior_probabilities=prior_probs,
            time_limit=time_limit,
            threads=num_threads,
            max_neighborhood_size=max_neighborhood_size)

        reconstructed_network_hybrid = nx.relabel_nodes(
            reconstructed_network_hybrid, string_to_sample)

        #out = stem + "_hybrid.pkl"
        #pic.dump(reconstructed_network_hybrid, open(out, "wb"))

        #newick = convert_network_to_newick_format(reconstructed_network_hybrid)

        #out = stem + "_hybrid.txt"
        #with open(out, "w") as f:
        #    f.write(newick)

        pic.dump(reconstructed_network_hybrid,
                 open(name.replace("true", "hybrid"), "wb"))

    elif args.ilp:

        if verbose:
            print('Running Hybrid Algorithm on ' +
                  str(len(target_nodes_uniq)) + " Cells")
            print('Parameters: ILP on sets of ' + str(cutoff) + ' cells ' +
                  str(time_limit) + 's to complete optimization')

        reconstructed_network_ilp = solve_lineage_instance(
            target_nodes_uniq,
            method="ilp",
            hybrid_subset_cutoff=cutoff,
            prior_probabilities=prior_probs,
            time_limit=time_limit,
            threads=num_threads)

        reconstructed_network_ilp = nx.relabel_nodes(reconstructed_network_ilp,
                                                     string_to_sample)
        pic.dump(reconstructed_network_ilp,
                 open(name.replace("true", "ilp"), "wb"))

    elif args.neighbor_joining:

        if verbose:
            print("Running Neighbor-Joining on " +
                  str(len(target_nodes_uniq)) + " Unique Cells")

        infile = ''.join(name.split(".")[:-1]) + 'infile.txt'
        fn = ''.join(name.split(".")[:-1]) + 'phylo.txt'
        write_leaves_to_charmat(target_nodes_original_network_uniq, fn)

        script = (SCLT_PATH / 'TreeSolver' / 'binarize_multistate_charmat.py')
        cmd = "python3.6 " + str(
            script) + " " + fn + " " + infile + " --relaxed"
        p = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p.pid, 0)

        aln = AlignIO.read(infile, "phylip")

        aln = unique_alignments(aln)

        t0 = time.time()
        calculator = DistanceCalculator('identity', skip_letters='?')
        constructor = DistanceTreeConstructor(calculator, 'nj')

        tree = constructor.build_tree(aln)

        out = stem + "_nj.txt"
        Phylo.write(tree, out, 'newick')

        print(
            str(param) + "\t" + str(run) + "\t" + "neighbor-joining" + "\t" +
            str(t) + "\t" + str(time.time() - t0))

        os.system("rm " + infile)
        os.system("rm " + fn)

    elif args.camin_sokal:

        if verbose:
            print('Running Camin-Sokal Max Parsimony Algorithm on ' +
                  str(len(target_nodes_uniq)) + " Unique Cells")

        infile = ''.join(name.split(".")[:-1]) + '_cs_infile.txt'
        fn = ''.join(name.split(".")[:-1]) + '_cs_phylo.txt'
        weights_fn = ''.join(name.split(".")[:-1]) + "_cs_weights.txt"
        write_leaves_to_charmat(target_nodes_original_network_uniq, fn)

        script = (SCLT_PATH / 'TreeSolver' / 'binarize_multistate_charmat.py')
        cmd = "python3.6 " + str(
            script) + " " + fn + " " + infile + " --relaxed"
        pi = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(pi.pid, 0)

        weights = construct_weights(infile, weights_fn)

        outfile = stem + 'outfile.txt'
        outtree = stem + 'outtree.txt'
        # run phylip mix with camin-sokal
        responses = "." + stem + ".temp.txt"
        FH = open(responses, 'w')
        current_dir = os.getcwd()
        FH.write(infile + "\n")
        FH.write("F\n" + outfile + "\n")
        FH.write("P\n")
        FH.write("W\n")
        FH.write("Y\n")
        FH.write(weights_fn + "\n")
        FH.write("F\n" + outtree + "\n")
        FH.close()

        t0 = time.time()
        cmd = "~/software/phylip-3.697/exe/mix"
        cmd += " < " + responses + " > screenout1"
        p = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p.pid, 0)

        consense_outtree = stem + "consenseouttree.txt"
        consense_outfile = stem + "consenseoutfile.txt"

        FH = open(responses, "w")
        FH.write(outtree + "\n")
        FH.write("F\n" + consense_outfile + "\n")
        FH.write("Y\n")
        FH.write("F\n" + consense_outtree + "\n")
        FH.close()

        if verbose:
            print("Computing Consensus Tree, elasped time: " +
                  str(time.time() - t0))

        cmd = "~/software/phylip-3.697/exe/consense"
        cmd += " < " + responses + " > screenout"
        p2 = subprocess.Popen(cmd, shell=True)
        pid, ecode = os.waitpid(p2.pid, 0)

        os.system("rm " + outfile)
        os.system("rm " + responses)
        os.system("rm " + outtree)
        os.system("rm " + consense_outfile)
        os.system("rm " + infile)
        os.system("rm " + fn)

    else:

        raise Exception(
            "Please choose an algorithm from the list: greedy, hybrid, ilp, nj, or camin-sokal"
        )