def main(): '''main method.''' seqs = {} seq_id = '' seq = '' rand_len = 300 max_repeat_nucl = 3 invalid_patterns = ['AGATCT', 'CACCTGC', 'CTCGAG', 'GAATTC', 'GAGTC([AGCT]{5})', 'GGATCC', 'GGTCTC'] with open('promoters.txt') as fle: for line in fle.read().split('\r'): if len(line) > 0 and line[0] == '>': if len(seq) > 0: seqs[seq_id] = seq seq_id = line[1:].strip() seq = '' else: seq = seq + line.strip().upper() seqs[seq_id] = seq for seq_id, seq in seqs.iteritems(): rand_range = range(seq.find('*'), seq.find('*') + rand_len) update_seq = string.replace( seq, '*', sequence_utils.get_random_dna(rand_len, max_repeat_nucl, invalid_patterns)) while _unacceptable(update_seq, rand_range): update_seq = string.replace( seq, '*', sequence_utils.get_random_dna(rand_len, max_repeat_nucl, invalid_patterns)) seqs[seq_id] = update_seq with open('promoters_update.txt', 'w') as fle: for seq_id, seq in seqs.iteritems(): fle.write('>' + seq_id + '\n') fle.write(seq + '\n')
def get_melt_temp(length=35, max_repeat_nuc=5, num=100): '''Gets melting temps.''' melt_tmps = [] seqs = [sequence_utils.get_random_dna(length, max_repeat_nuc) for _ in range(num)] for idx, seq in enumerate(seqs): try: melt_tmps.extend([sequence_utils.get_melting_temp(seq, dna2, strict=False) for dna2 in seqs[idx + 1:]]) except ZeroDivisionError, err: # Take no action print err