Exemple #1
0
import Conf;
import SeqGen;
from Conf import *;
from SeqGen import *;
import generateYaml;
from generateYaml import *;


#conf = Conf("../conf/SeqGen.yaml");

generateYaml("/tmp");
conf = Conf("/tmp/seq.yml");
seqGen = SeqGen(conf);
seqGen.GenerateRandomSequences("negative");
seqGen.GenerateRandomSequences("positive");

motif = seqGen.GenerateMotif()

seqGen.embedMotifInSequence();

print "Positive Set: "
for seq in seqGen.GetPositiveSet():
	print seq;

print "Negative Set: "
for seq in seqGen.GetNegativeSet():
	print seq;
Exemple #2
0
import SeqGen
from Conf import *
from SeqGen import *
import generateYaml
from generateYaml import *
from SeqGenUtils import *
import sys

directory = sys.argv[1]
CreateConfFiles(directory)

for confFile in findFiles(directory, '*.yml'):

    conf = Conf(confFile)

    seqGen = SeqGen(conf)
    filename = os.path.splitext(os.path.basename(confFile))[0]
    posFastaFile = directory + "/" + filename + "_pos.fa"
    seqGen.SetPosFileName(posFastaFile)

    negFastaFile = directory + "/" + filename + "_neg.fa"

    seqGen.SetNegFileName(negFastaFile)

    seqGen.GenerateRandomSequences("negative", 0)
    seqGen.GenerateRandomSequences("positive", 1)
    #seqGen.embedMotifInSequence();

    # print "Positive Set: "
    # for seq in seqGen.GetPositiveSet():
    # 	print seq;
Exemple #3
0
import SeqGen;
from Conf import *;
from SeqGen import *;
import generateYaml;
from generateYaml import *;
from SeqGenUtils import *;

import sys;

directory = sys.argv[1]
CreateConfFiles(directory);
for confFile in findFiles(directory, '*.yml'):

	conf = Conf(confFile);

	seqGen = SeqGen(conf);
	filename = os.path.splitext(os.path.basename(confFile))[0]

	negFastaFile = directory + "/" + filename + "_neg.fa";

	seqGen.SetNegFileName(negFastaFile)

	seqGen.GenerateRandomSequences("negative", 0);

	#seqGen.embedMotifInSequence();

	# print "Positive Set: "
	# for seq in seqGen.GetPositiveSet():
	# 	print seq;

	# print "Negative Set: "
def main(tree_filename, output_filename, single_cols_size, paired_cols_size):
	"""
	tree_filename --- input newick-format tree filename
	output_filename  --- output stockholm filename
	single|paried cols size --- number of unpaired or paired positions
	"""
#	msa_filename = 'data/test2.sto'
#	lo = .001
#	hi = 1.5
#	new_tree_filename = 'data/test3.seqgen.tree'

#	gen_tree(ntaxa, abl, tree_filename)
	tree = dendropy.Tree.get_from_path(tree_filename, 'newick')
#	print >> sys.stderr, "rescaling branches to be in [{0},{1})".format(lo, hi)
#	Tree.randomly_rescale_branches(tree, lo, hi)
#	with open(new_tree_filename, 'w') as f:
#		f.write(tree.as_newick_string() + '\n')

#	msa = MSA.MSA(msa_filename)
#	paired_cols = msa.BP.items()
#	paired_cols.sort()
#	paired_cols_size = len(paired_cols)
#	single_cols_size = msa.aln_len - paired_cols_size * 2
	print >> sys.stderr, "{0} single cols, {1} paired cols".format(single_cols_size, paired_cols_size)

	tree.print_plot()
	oid_preorder = [n.oid for n in tree.preorder_node_iter()]
	r = tree.preorder_node_iter()
	r.next() # ignore root
	order = [(n.edge_length, oid_preorder.index(n.parent_node.oid)) for n in r]
	print >> sys.stderr, "order is", order


	single_model = SingleModel(SINGLE_MODEL_FULLPATH)
	paired_model = PairedModel(PAIRED_MODEL_FULLPATH, single_model)
	seqs_s = SeqGen.main(single_model.gtr.R, single_model.Frequency, order, single_cols_size, False)
	seqs_p = SeqGen.main(paired_model.gtr.R, paired_model.Frequency, order, paired_cols_size, True)
	
	# now print the generate seqs as stockholm file
	with open(output_filename, 'w') as f:
		l1 = single_cols_size/2
		l2 = single_cols_size - l1
		seqrecs = []


		# DEBUG BEGIN
#		print("self\tparent\tbranch\tsequence")
#		for n in tree.preorder_node_iter():
#			i = oid_preorder.index(n.oid)
#			odd_p = "".join(seqs_p[i][k*2] for k in xrange(paired_cols_size))
#			eve_p = "".join(seqs_p[i][k*2+1] for k in xrange(paired_cols_size-1,-1,-1))
#			s = seqs_s[i][:l1] 
#			for k in xrange(paired_cols_size): s += seqs_p[i][k*2]
#			for k in xrange(paired_cols_size-1, -1, -1): s += seqs_p[i][k*2+1]
#			s += seqs_s[i][l1:]
#			print("{0}\t{1}\t{2}\t{3}".format(i, oid_preorder.index(n.parent_node.oid) if n.parent_node is not None else -1, n.edge_length, s))
#			if n.is_leaf(): seqrecs.append(SeqRecord(Seq(s), id=n.taxon.label))
		# DEBUG END

		# COMMENT BELOW WHILE DEBUGGING
		for n in tree.leaf_nodes():
			i = oid_preorder.index(n.oid)#			
			#odd_p = "".join(seqs_p[i][k*2] for k in xrange(paired_cols_size))
			#eve_p = "".join(seqs_p[i][k*2+1] for k in xrange(paired_cols_size-1,-1,-1))
			s = seqs_s[i][:l1] 
			for k in xrange(paired_cols_size): s += seqs_p[i][k*2]
			for k in xrange(paired_cols_size-1, -1, -1): s += seqs_p[i][k*2+1]
			s += seqs_s[i][l1:]
			seqrecs.append(SeqRecord(Seq(s), id=n.taxon.label))

		ss_cons = '.'*l1 + '('*paired_cols_size + ')'*paired_cols_size + '.'*l2
		rf = 'x' * (single_cols_size + 2*paired_cols_size)
		MSA.write_stockholm(f, seqrecs, ss_cons, rf)