Esempio n. 1
0
	def test_remove_gaps(self):
		s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
		others = [''.join([mut(x, 0.2,0.1) for x in s1]) for i in range(9)]
		seqs = [s1]+others
		als = muscle.alignSequences(seqs)
		res = len(als) == len(seqs)
		for i in range(len(als)):
			self.assertTrue(als[i].replace("-",'') == seqs[i].replace("-",''))
Esempio n. 2
0
def test002():
	s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
	others = [''.join([mut(x, 0.2,0.1) for x in s1]) for i in range(9)]
	seqs = [s1]+others
	res = False
	try:
		als = muscle.alignSequences(seqs, exepath=os.path.expanduser("~/develop/muscle3.8.13/muscle"))
	except muscle.MuscleError, me:
		res = True
Esempio n. 3
0
def test001():
	s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
	others = [''.join([mut(x, 0.2,0.1) for x in s1]) for i in range(9)]
	seqs = [s1]+others
	als = muscle.alignSequences(seqs)
	res = len(als) == len(seqs)
	for i in range(len(als)):
		res = res and (als[i].replace("-",'') == seqs[i].replace("-",''))
	return True
Esempio n. 4
0
def test001():
    s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
    others = [''.join([mut(x, 0.2, 0.1) for x in s1]) for i in range(9)]
    seqs = [s1] + others
    als = muscle.alignSequences(seqs)
    res = len(als) == len(seqs)
    for i in range(len(als)):
        res = res and (als[i].replace("-", '') == seqs[i].replace("-", ''))
    return True
Esempio n. 5
0
 def test_remove_gaps(self):
     s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
     others = [''.join([mut(x, 0.2, 0.1) for x in s1]) for i in range(9)]
     seqs = [s1] + others
     als = muscle.alignSequences(seqs)
     res = len(als) == len(seqs)
     for i in range(len(als)):
         self.assertTrue(
             als[i].replace("-", '') == seqs[i].replace("-", ''))
Esempio n. 6
0
def test002():
    s1 = ''.join(stats.sample_wr(translate.AAs(), 100))
    others = [''.join([mut(x, 0.2, 0.1) for x in s1]) for i in range(9)]
    seqs = [s1] + others
    res = False
    try:
        als = muscle.alignSequences(
            seqs, exepath=os.path.expanduser("~/develop/muscle3.8.13/muscle"))
    except muscle.MuscleError, me:
        res = True
Esempio n. 7
0
	def test_gapped_index(self):
		s1 = ''.join(stats.sample_wr(translate.AAs(), 50))
		# No gaps: pgap = 0.0
		others = [''.join([mut(x, 0.2,0.0) for x in s1]) for i in range(9)]
		seqs = [s1]+others
		res = False
		try:
			als = muscle.alignSequences(seqs)
			#print als
			self.assertTrue(len(als) == len(seqs))
			for (i, s) in enumerate(seqs):
				self.assertTrue(s == als[i].replace("-",''))
		except muscle.MuscleError as me:
			self.assertTrue(False)
Esempio n. 8
0
 def test_gapped_index(self):
     s1 = ''.join(stats.sample_wr(translate.AAs(), 50))
     # No gaps: pgap = 0.0
     others = [''.join([mut(x, 0.2, 0.0) for x in s1]) for i in range(9)]
     seqs = [s1] + others
     res = False
     try:
         als = muscle.alignSequences(seqs)
         #print als
         self.assertTrue(len(als) == len(seqs))
         for (i, s) in enumerate(seqs):
             self.assertTrue(s == als[i].replace("-", ''))
     except muscle.MuscleError as me:
         self.assertTrue(False)
Esempio n. 9
0
def makeAlignments(ortho_dict, cdna_dicts, filter_fxn=default_filter_fxn, filter_data=None, alignment_print_fxn=default_alignment_print_fxn):
	alignment_dict = {}
	num_aligns = 0
	#print cdna_dicts.keys()

	for orf in ortho_dict.keys():
		ortho_orfs = ortho_dict[orf]
		#print orf, ortho_orfs
		seqs = {}
		for (spec, sorf) in ortho_orfs:
			try:
				genome = cdna_dicts[spec]
				seq = genome[sorf]
				# Translate and so on
				prot = translate.translate(seq)
				if prot:
					seqs[spec] = (sorf, prot)
				else:
					print("# protein", sorf, "did not translate")
					#print seq
					#print translate.translateRaw(seq)
			except KeyError as ke:
				print("#", ke, spec, sorf, orf)
				pass

		species = seqs.keys()
		if filter_fxn(orf, seqs, filter_data): #len(species) == len(genome_dicts.keys()): # Found as many orthologs as genomes
			prots = [seqs[key][1] for key in species]
			try:
				protal = muscle.alignSequences(prots, 16)
				hdrs = [(spec, seqs[spec][0]) for spec in species]
				alignment_dict[orf] = (len(protal), hdrs, protal)
				num_aligns += 1
				alignment_print_fxn(num_aligns, prots, protal, hdrs, orf)
			except muscle.MuscleError as me:
				print("#", me)

	return alignment_dict
Esempio n. 10
0
	def secondField(h):
		f = None
		try:
			f = biofile.secondField(h)
		except:
			f = biofile.firstField(h)
		return f

	# Read input
	if not os.path.isfile(options.in_fname):
		raise IOError("# Error: file {} does not exist".format(options.in_fname))
	(headers, seqs) = biofile.readFASTA(file(options.in_fname, 'r')) #, key_fxn=biofile.secondField)
	if options.translate_sequences:
		seqs = [translate.translate(s) for s in seqs]
	if not options.dont_align_sequences:
		aligned_seqs = muscle.alignSequences(seqs)
		seqs = aligned_seqs
	zhs = [(h,s) for (h,s) in zip(headers,seqs) if not s is None]
	all_keys = [biofile.firstField(h) for (h,s) in zhs]
	(headers, seqs) = zip(*zhs)
	prot_dict = dict([(biofile.firstField(h), s) for (h,s) in zhs])
	gene_orf_dict = dict([(secondField(h), biofile.firstField(h)) for h in headers])
	orf_gene_dict = dict([(v,k) for (k,v) in gene_orf_dict.items()])
	
	# Write output
	n_written = 0
	data_outs.write("header\n")
	for orf in query_keys:
		n_written += 1

	# Write out stopping time
Esempio n. 11
0
		if '_' in seq:
			store_seq = False
		if store_seq:
			new_header = '{sname} {hdr}'.format(sname=species_name, hdr=hdr)
			#named_headers[species_name] = new_header
			#named_seqs[species_name] = seq
			new_headers.append(new_header)
			new_seqs.append(seq)

	#(headers, seqs) = zip(*[(named_headers[x],named_seqs[x]) for x in sorted(named_seqs.keys())])
	headers = new_headers
	seqs = new_seqs

	if options.align:
		#print os.path.expanduser('/cygdrive/f/develop/muscle3.8.31/muscle')
		aligned_seqs = muscle.alignSequences(seqs) #, exepath='~\\develop\\muscle3.8.31\\muscle')
		seqs = aligned_seqs



	# Write output
	n_written = 0
	for (hdr, seq) in zip(headers,seqs):
		line = ">{hdr}\n{seq}\n".format(hdr=hdr, seq=seq)
		data_outs.write(line)
		n_written += 1


	# Write out stopping time
	data_outs.write("# Run finished {}\n".format(util.timestamp()))
Esempio n. 12
0
        f = None
        try:
            f = biofile.secondField(h)
        except:
            f = biofile.firstField(h)
        return f

        # Read input

    if not os.path.isfile(options.in_fname):
        raise IOError("# Error: file {} does not exist".format(options.in_fname))
    (headers, seqs) = biofile.readFASTA(file(options.in_fname, "r"))  # , key_fxn=biofile.secondField)
    if options.translate_sequences:
        seqs = [translate.translate(s) for s in seqs]
    if not options.dont_align_sequences:
        aligned_seqs = muscle.alignSequences(seqs)
        seqs = aligned_seqs
    zhs = [(h, s) for (h, s) in zip(headers, seqs) if not s is None]
    all_keys = [biofile.firstField(h) for (h, s) in zhs]
    (headers, seqs) = zip(*zhs)
    prot_dict = dict([(biofile.firstField(h), s) for (h, s) in zhs])
    gene_orf_dict = dict([(secondField(h), biofile.firstField(h)) for h in headers])
    orf_gene_dict = dict([(v, k) for (k, v) in gene_orf_dict.items()])

    # Write output
    n_written = 0
    data_outs.write("header\n")
    for orf in query_keys:
        n_written += 1

        # Write out stopping time