Ejemplo n.º 1
0
def test_run_local_alignment():

    seq_list = get_seq_list("./NC_000913.fna.gz", "./candidate_list.table")

    # seq_list holds tuples of (name, seq_obj)
    seq_file_name = "./tmp/%s.fasta" % seq_list[0][0]
    base_record = SeqRecord(seq_list[0][1],
                            id=seq_file_name,
                            name=seq_file_name)
    SeqIO.write(base_record, seq_file_name, "fasta")

    local_alignment_list = \
     run_local_alignment(seq_file_name, "./genome_list.table")

    print "*" * 100
    print "local alignments results"
    print "*" * 100

    for row in local_alignment_list:
        print row

    records_for_emma = \
     prepare_for_multiple_aignment(base_record, local_alignment_list)

    print "*" * 100
    print "records for emma"
    print "*" * 100

    for record in records_for_emma:
        print record

    print "*" * 100
    print "Running Emma"
    print "*" * 100
    run_multiple_sequence_alignment(records_for_emma)
Ejemplo n.º 2
0
def test_run_local_alignment():

	seq_list = get_seq_list("./NC_000913.fna.gz", "./candidate_list.table")

	# seq_list holds tuples of (name, seq_obj)
	seq_file_name = "./tmp/%s.fasta" % seq_list[0][0]
	base_record = SeqRecord(seq_list[0][1], id=seq_file_name, name=seq_file_name)
	SeqIO.write(base_record, seq_file_name, "fasta")

	local_alignment_list = \
		run_local_alignment(seq_file_name, "./genome_list.table")

	print "*" * 100
	print "local alignments results"
	print "*" * 100

	for row in local_alignment_list:
		print row

	records_for_emma = \
		prepare_for_multiple_aignment(base_record, local_alignment_list)

	print "*" * 100
	print "records for emma"
	print "*" * 100

	for record in records_for_emma:
		print record

	print "*" * 100
	print "Running Emma"
	print "*" * 100
	run_multiple_sequence_alignment(records_for_emma)
Ejemplo n.º 3
0
def run(base_genome_file, 
		candidate_list_file, 
		workdir, 
		genome_list_file,
		project_to_taxamonies_file,
		tree_file):

	os.system("mkdir -p %s" % workdir)
	os.system("mkdir -p %s" % os.path.join(workdir, "logs"))
	os.system("mkdir -p %s" % os.path.join(workdir, "aln"))
	os.system("mkdir -p %s" % os.path.join(workdir, "stats"))
	os.system("mkdir -p %s" % os.path.join(workdir, "results"))

	candidates_path = os.path.join(workdir, "candiates")
	os.system("mkdir -p %s" % candidates_path)


	seq_list = get_seq_list(base_genome_file, candidate_list_file)
	conv_table = build_project_id_to_tax_id_dictionary(project_to_taxamonies_file)
	secondary_conv_table = \
		build_secondary_project_id_to_tax_id_dictionary(project_to_taxamonies_file)

	# seq_list holds tuples of (name, seq_obj)
	for seq in seq_list:

		candid_workdir = os.path.join(candidates_path, seq[0])
		os.system("mkdir -p %s" % candid_workdir)

		print seq

		# Generate sequence file
		seq_file_name = "%s/%s.fasta" % (candid_workdir, seq[0])
		base_record = SeqRecord(seq[1], id=seq[0], name=seq[0])
		SeqIO.write(base_record, seq_file_name, "fasta")

		# Create local alignments for the sequence
		local_alignment_list = \
			run_local_alignment(seq_file_name, genome_list_file, WORK_DIR)

		print "*" * 100
		print "local alignments results"
		print "*" * 100

		for row in local_alignment_list:
			print row

		# Generate histogram
		generate_score_histogram("%s/stats/%s.png" % (workdir, seq[0]), local_alignment_list)

		# Exctract only the significant alignments
		records_for_emma = \
			prepare_for_multiple_aignment(local_alignment_list,
										  conv_table,
										  secondary_conv_table)

		print "*" * 100
		print "records for emma"
		print "*" * 100

		for record in records_for_emma:
			print record

		print "*" * 100
		print "Running Emma"
		print "*" * 100

		# Run emma
		run_multiple_sequence_alignment(records_for_emma,
										"%s/emma.aln" % candid_workdir,
										"%s/emma.dnd" % candid_workdir,
										candid_workdir)

		# Run Rate4Site
		rate_runner = Rate4Site("%s/emma.aln" % candid_workdir, 
								tree_file,
								"./rate4site64")

		rate_runner.runRate(outname="%s/results/%s.rate" % (workdir, seq[0]))