def test_align_subsets(): aln = fasta.FastaDict() aln["a"] = "AAA-A" aln["b"] = "-BD-C" aln["c"] = "A-D--" aln2 = alignlib.remove_empty_columns(aln) assert aln2 == {'a': 'AAAA', 'c': 'A-D-', 'b': '-BDC'} aln2 = alignlib.remove_gapped_columns(aln) assert aln2 == {'a': 'A', 'c': 'D', 'b': 'D'} aln2 = alignlib.require_nseqs(aln, 2) assert aln2 == {'a': 'AAAA', 'c': 'A-D-', 'b': '-BDC'}
def find_parsimony(tree, align): ptree, nodes, nodelookup = make_ptree(tree) nnodes = len(nodes) dists = [x.dist for x in nodes] nseqs = len(align) seqlen = len(align.values()[0]) leaves = [x for x in nodes if x.is_leaf()] calign = (c_char_p * nseqs)(*[align[x.name] for x in leaves]) cancestral = (c_char_p * (nseqs - 1))(*["-" * seqlen for i in xrange(nseqs - 1)]) #print ">>>", list(cancestral) parsimony(nnodes, ptree, nseqs, calign, dists, True, cancestral) #print list(cancestral) ancestral = fasta.FastaDict() for i, key in enumerate(node.name for node in nodes if not node.is_leaf()): ancestral[key] = cancestral[i] return ancestral
def make_alignment(arg, mutations, infsites=False): """ Make FASTA alignment from ARG and sampled mutations. """ aln = fasta.FastaDict() alnlen = int(arg.end - arg.start) leaves = list(arg.leaf_names()) nleaves = len(leaves) # sort mutations by position mutations.sort(key=lambda x: x[2]) # make align matrix mat = [] muti = 0 for i in xrange(alnlen): ancestral = "ACGT"[random.randint(0, 3)] if muti >= len(mutations) or i < int(mutations[muti][2]): # no mut mat.append(ancestral * nleaves) else: # mut mut_count = defaultdict(int) while muti < len(mutations) and i == int(mutations[muti][2]): mut_count[mutations[muti][0].name] += 1 muti += 1 # enforce infinite sites if infsites: mut_count = {random.sample(mut_count.items(), 1)[0][0]: 1} tree = arg.get_marginal_tree(i - .5) bases = {tree.root.name: ancestral} for node in tree.preorder(): if not node.parents: continue ancestral = bases[node.parents[0].name] if node.name in mut_count: c = mut_count[node.name] i = 0 while True: derived = ancestral while derived == ancestral: derived = "ACGT"[random.randint(0, 3)] i += 1 if i == c: break ancestral = derived bases[node.name] = derived else: bases[node.name] = ancestral mat.append("".join(bases[l] for l in leaves)) # make fasta for i, leaf in enumerate(leaves): aln[leaf] = "".join(x[i] for x in mat) return aln
def __init__(self, genomes, chroms, regions, blocks, orths, **options): SyntenyVisBase.__init__(self, genomes, chroms, regions, blocks, orths, **options) self.click_mode = "gene" self.selgenes = [] self.seqs = fasta.FastaDict()