def align_sequence(muscle_exe, sequence, rif=None, ): """sequence is a datatypes.Sequence, rif""" if rif is None: rif = datatypes.Sequence('RSRS', consts.RCRS) seq_diff = NGclassify.SequenceDiff() #print "Aligning sequence %s" % sequence.name seq_diff.gen_diff(muscle_exe, rif, datatypes.Sequence(sequence.name, str(sequence))) #print "-"*30 return seq_diff
def load_fasta_file(fname): f = open(fname, 'r') name = f.readline()[1:-1].strip() seq = [] for line in f: seq.append(line.strip()) return datatypes.Sequence(name, ''.join(seq).upper())
contig_seq_diff.regions.append([contig_seq_diff.start, contig_seq_diff.end]) else: incoming_seqdiff = align_sequence(muscle_exe, contig) incoming_seqdiff.find_segment() contig_seq_diff.diff_list.extend(incoming_seqdiff.diff_list) contig_seq_diff.regions.append([incoming_seqdiff.start, incoming_seqdiff.end]) print "\nSequence haplogroup assignment\n" seq_classify = h_analysis(htrees, contig_seq_diff, contig_seq_diff.regions, mhcs_dict) seq_classify.sample_name = sample_name #print "\nSequence functional annotation\n" print "Contig alignment to MHCS and rCRS" m = list(seq_classify.mhcss)[0] print "Aligning contigs to MHCS SeqDiff object" its_mhcs = datatypes.Sequence(m, mhcs_dict[m]) #contig_mhcs_total_seqdiff = [] for x, contig in enumerate(contig_array): if x == 0: contig_mhcs_seq_diff = align_sequence(muscle_exe, contig, its_mhcs) contig_mhcs_seq_diff.find_segment() contig_mhcs_seq_diff.regions.append([contig_seq_diff.start, contig_seq_diff.end]) else: incoming_mhcs_seqdiff = align_sequence(muscle_exe, contig, its_mhcs) incoming_mhcs_seqdiff.find_segment() contig_mhcs_seq_diff.diff_list.extend(incoming_mhcs_seqdiff.diff_list) contig_mhcs_seq_diff.regions.append([incoming_mhcs_seqdiff.start, incoming_mhcs_seqdiff.end]) print "rCRS SeqDiff object" rcrs = datatypes.Sequence('rCRS', consts.rcrs) for x, contig in enumerate(contig_array):