def make_old_pseknc_vector(sequence_list, lamada, w, k, phyche_value, theta_type=1): """Generate the pseknc vector.""" kmer = make_kmer_list(k, ALPHABET) vector = [] for sequence in sequence_list: if len(sequence) < k or lamada + k > len(sequence): error_info = "Sorry, the sequence length must be larger than " + str(lamada + k) sys.stderr.write(error_info) sys.exit(0) # Get the nucleotide frequency in the DNA sequence. fre_list = [frequency(sequence, str(key)) for key in kmer] fre_sum = float(sum(fre_list)) # Get the normalized occurrence frequency of nucleotide in the DNA sequence. fre_list = [e / fre_sum for e in fre_list] # Get the theta_list according the Equation 5. if 1 == theta_type: theta_list = get_parallel_factor_psednc(lamada, sequence, phyche_value) elif 2 == theta_type: theta_list = get_series_factor(k, lamada, sequence, phyche_value) theta_sum = sum(theta_list) # Generate the vector according the Equation 9. denominator = 1 + w * theta_sum temp_vec = [round(f / denominator, 3) for f in fre_list] for theta in theta_list: temp_vec.append(round(w * theta / denominator, 4)) vector.append(temp_vec) return vector
def make_pseknc_vector(sequence_list, phyche_value, k=2, w=0.05, lamada=1, alphabet=index_list.DNA, theta_type=1): """Generate the pseknc vector.""" kmer = make_kmer_list(k, alphabet) vector = [] for sequence in sequence_list: if len(sequence) < k or lamada + k > len(sequence): error_info = "Sorry, the sequence length must be larger than " + str( lamada + k) sys.stderr.write(error_info) sys.exit(0) # Get the nucleotide frequency in the DNA sequence. fre_list = [frequency(sequence, str(key)) for key in kmer] fre_sum = float(sum(fre_list)) # Get the normalized occurrence frequency of nucleotide in the DNA sequence. fre_list = [e / fre_sum for e in fre_list] # Get the theta_list. if 1 == theta_type: theta_list = get_parallel_factor(k, lamada, sequence, phyche_value, alphabet) elif 2 == theta_type: theta_list = get_series_factor(k, lamada, sequence, phyche_value, alphabet) elif 3 == theta_type: theta_list = get_parallel_factor(k=2, lamada=lamada, sequence=sequence, phyche_value=phyche_value, alphabet=alphabet) theta_sum = sum(theta_list) # Generate the vector according the Equation 9. denominator = 1 + w * theta_sum temp_vec = [round(f / denominator, 8) for f in fre_list] for theta in theta_list: temp_vec.append(round(w * theta / denominator, 8)) vector.append(temp_vec) return vector
def make_kmer_vector(k, alphabet, filename, revcomp=False): """Generate kmer vector.""" with open(filename) as f: seq_list = get_data(f, alphabet=alphabet) if revcomp and re.search(r'[^acgtACGT]', ''.join(alphabet)) is not None: sys.exit("Error, Only DNA sequence can be reverse compliment.") vector = [] kmer_list = make_kmer_list(k, alphabet) for seq in seq_list: count_sum = 0 # Generate the kmer frequency dict. kmer_count = {} for kmer in kmer_list: temp_count = frequency(seq, kmer) if not revcomp: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count else: rev_kmer = find_revcomp(kmer, {}) if kmer <= rev_kmer: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count else: if rev_kmer not in kmer_count: kmer_count[rev_kmer] = 0 kmer_count[rev_kmer] += temp_count count_sum += temp_count # Normalize. if not revcomp: count_vec = [kmer_count[kmer] for kmer in kmer_list] else: revc_kmer_list = make_revcomp_kmer_list(kmer_list) count_vec = [kmer_count[kmer] for kmer in revc_kmer_list] count_vec = [round(float(e) / count_sum, 8) for e in count_vec] vector.append(count_vec) return vector
def make_kmer_vector(k, alphabet, filename, revcomp=False): """Generate kmer vector.""" with open(filename) as f: seq_list = get_data(f, alphabet=alphabet) if revcomp and re.search(r'[^acgtACGT]', ''.join(alphabet)) is not None: sys.exit("Error, Only DNA sequence can be reverse compliment.") vector = [] kmer_list = make_kmer_list(k, alphabet) for seq in seq_list: count_sum = 0 # Generate the kmer frequency dict. kmer_count = {} for kmer in kmer_list: temp_count = frequency(seq, kmer) if not revcomp: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count else: rev_kmer = find_revcomp(kmer, {}) if kmer <= rev_kmer: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count else: if rev_kmer not in kmer_count: kmer_count[rev_kmer] = 0 kmer_count[rev_kmer] += temp_count count_sum += temp_count # Normalize. if not revcomp: count_vec = [kmer_count[kmer] for kmer in kmer_list] else: revc_kmer_list = make_revcomp_kmer_list(kmer_list) count_vec = [kmer_count[kmer] for kmer in revc_kmer_list] count_vec = [round(float(e)/count_sum, 8) for e in count_vec] vector.append(count_vec) return vector
def estimations_from_predictions(predictions, annotations, tuning, config): e_list = [] for prediction, annotation in zip(predictions, annotations): string = annotation.string fret = annotation.fret normal_freq = util.frequency(fret, tuning.tuning[string - 1]) estimations = init_estimations_from_normal(normal_freq, tuning, config) string = 1 for confidence in prediction: for estimation in estimations: if estimation.string == string: estimation.set_weight(confidence) string += 1 # init estimation object with weight = 0 e = make_estimation() # find estimation with max weight for estimation in estimations: if estimation.weight > e.weight: e = estimation e_list.append(e) return e_list
def make_kmer_vector_ID(seq_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize): """Generate kmer vector.""" # Generate the alphabet index. if upto: index = make_index_upto_k(k) sum = [0] * k len_k = k else: index = make_index(k) sum = [0] len_k = 1 vector = [] for seq in seq_list: kmer_count = {} # Generate the kmer frequency vector. for i in range(len_k): sum[i] = 0 for j in range(index[i], index[i + 1]): kmer = kmer_list[j] temp_count = frequency(seq, kmer) # print temp_count if revcomp: rev_kmer = find_revcomp(kmer, {}) if kmer <= rev_kmer: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count else: if rev_kmer not in kmer_count: kmer_count[rev_kmer] = 0 kmer_count[rev_kmer] += temp_count else: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count sum[i] += temp_count # Store the kmer frequency vector. if revcomp: temp_vec = [kmer_count[kmer] for kmer in rev_kmer_list] else: temp_vec = [kmer_count[kmer] for kmer in kmer_list] # Normalize. if normalize: i = 0 if not upto: temp_vec = [round(float(e)/sum[i], 3) for e in temp_vec] if upto: if revcomp: upto_index = make_index_upto_k_revcomp(k) else: upto_index = make_index_upto_k(k) j = 0 for e in temp_vec: if j >= upto_index[i + 1]: i += 1 temp_vec[j] = round(float(e) / sum[i], 3) j += 1 vector.append(temp_vec) # if 0 != len(rev_kmer_list): # print "The kmer is", rev_kmer_list # else: # print "The kmer is", kmer_list return vector
#if not defined warp[1] set to None alpha = [ 8.037098164766618, 9.2494445061479, 11.983630862563617, 8.418011746246092, 10.100367601431355, 9.271272689547438, 8.361221354294603, 200, 9.109799699070852, 9.919370757465913 ] beta = [ 27.971242258713225, 13.895897768035837, 29.979076350711615, 18.23346507541101, 17.20632120434984, 14.5832818156608, 16.282666801491864, 200, 17.01010654043829, 14.973403850861265 ] theta = [ 7.332827665540709, 7.693271996617378, 6.409582916757133, 6.925888943127349, 5.755089466047173, 7.137183585824163, 7.4507863470554625, 200, 5.113392670335407, 6.688337149006028 ] delta = [ 1.6747542908272948, 0.792628712689756, 0.9962083257186725, 1.1048836673314748, 1.23381736706717, 1.025315344646628, 1.2172809260762698, 1.542627872299756, 1.2572786093425488, 1.1197264326339946 ] f = util.frequency() im = binary.bin_image((50, 50), f, '0123', 3, warp=[None, None]) for i, j, k, r in zip(alpha, beta, theta, delta): # for each time point in the data f.store_signal(i, j, k, r) # x = f.store_signal(i,j,k,r) image = im.create_image()
def make_kmer_vector(seq_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize): """Generate kmer vector.""" # Generate the alphabet index. if upto: index = make_index_upto_k(k) sum = [0] * k len_k = k else: index = make_index(k) sum = [0] len_k = 1 vector = [] for seq in seq_list: kmer_count = {} # Generate the kmer frequency vector. for i in range(len_k): sum[i] = 0 for j in range(index[i], index[i + 1]): kmer = kmer_list[j] temp_count = frequency(seq, kmer) # print temp_count if revcomp: rev_kmer = find_revcomp(kmer, {}) if kmer <= rev_kmer: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count else: if rev_kmer not in kmer_count: kmer_count[rev_kmer] = 0 kmer_count[rev_kmer] += temp_count else: if kmer not in kmer_count: kmer_count[kmer] = 0 kmer_count[kmer] += temp_count sum[i] += temp_count # Store the kmer frequency vector. if revcomp: temp_vec = [kmer_count[kmer] for kmer in rev_kmer_list] else: temp_vec = [kmer_count[kmer] for kmer in kmer_list] # Normalize. if normalize: i = 0 if not upto: temp_vec = [round(float(e)/sum[i], 3) for e in temp_vec] if upto: if revcomp: upto_index = make_index_upto_k_revcomp(k) else: upto_index = make_index_upto_k(k) j = 0 for e in temp_vec: if j >= upto_index[i + 1]: i += 1 temp_vec[j] = round(float(e) / sum[i], 3) j += 1 vector.append(temp_vec) # if 0 != len(rev_kmer_list): # print "The kmer is", rev_kmer_list # else: # print "The kmer is", kmer_list return vector