Example #1
0
def make_old_pseknc_vector(sequence_list, lamada, w, k, phyche_value, theta_type=1):
    """Generate the pseknc vector."""
    kmer = make_kmer_list(k, ALPHABET)
    vector = []

    for sequence in sequence_list:
        if len(sequence) < k or lamada + k > len(sequence):
            error_info = "Sorry, the sequence length must be larger than " + str(lamada + k)
            sys.stderr.write(error_info)
            sys.exit(0)

        # Get the nucleotide frequency in the DNA sequence.
        fre_list = [frequency(sequence, str(key)) for key in kmer]
        fre_sum = float(sum(fre_list))

        # Get the normalized occurrence frequency of nucleotide in the DNA sequence.
        fre_list = [e / fre_sum for e in fre_list]

        # Get the theta_list according the Equation 5.
        if 1 == theta_type:
            theta_list = get_parallel_factor_psednc(lamada, sequence, phyche_value)
        elif 2 == theta_type:
            theta_list = get_series_factor(k, lamada, sequence, phyche_value)
        theta_sum = sum(theta_list)

        # Generate the vector according the Equation 9.
        denominator = 1 + w * theta_sum

        temp_vec = [round(f / denominator, 3) for f in fre_list]
        for theta in theta_list:
            temp_vec.append(round(w * theta / denominator, 4))

        vector.append(temp_vec)

    return vector
Example #2
0
def make_pseknc_vector(sequence_list,
                       phyche_value,
                       k=2,
                       w=0.05,
                       lamada=1,
                       alphabet=index_list.DNA,
                       theta_type=1):
    """Generate the pseknc vector."""
    kmer = make_kmer_list(k, alphabet)
    vector = []

    for sequence in sequence_list:
        if len(sequence) < k or lamada + k > len(sequence):
            error_info = "Sorry, the sequence length must be larger than " + str(
                lamada + k)
            sys.stderr.write(error_info)
            sys.exit(0)

        # Get the nucleotide frequency in the DNA sequence.
        fre_list = [frequency(sequence, str(key)) for key in kmer]
        fre_sum = float(sum(fre_list))

        # Get the normalized occurrence frequency of nucleotide in the DNA sequence.
        fre_list = [e / fre_sum for e in fre_list]

        # Get the theta_list.
        if 1 == theta_type:
            theta_list = get_parallel_factor(k, lamada, sequence, phyche_value,
                                             alphabet)
        elif 2 == theta_type:
            theta_list = get_series_factor(k, lamada, sequence, phyche_value,
                                           alphabet)
        elif 3 == theta_type:
            theta_list = get_parallel_factor(k=2,
                                             lamada=lamada,
                                             sequence=sequence,
                                             phyche_value=phyche_value,
                                             alphabet=alphabet)
        theta_sum = sum(theta_list)

        # Generate the vector according the Equation 9.
        denominator = 1 + w * theta_sum

        temp_vec = [round(f / denominator, 8) for f in fre_list]
        for theta in theta_list:
            temp_vec.append(round(w * theta / denominator, 8))

        vector.append(temp_vec)

    return vector
Example #3
0
def make_kmer_vector(k, alphabet, filename, revcomp=False):
    """Generate kmer vector."""
    with open(filename) as f:
        seq_list = get_data(f, alphabet=alphabet)

        if revcomp and re.search(r'[^acgtACGT]',
                                 ''.join(alphabet)) is not None:
            sys.exit("Error, Only DNA sequence can be reverse compliment.")

        vector = []
        kmer_list = make_kmer_list(k, alphabet)
        for seq in seq_list:
            count_sum = 0

            # Generate the kmer frequency dict.
            kmer_count = {}
            for kmer in kmer_list:
                temp_count = frequency(seq, kmer)
                if not revcomp:
                    if kmer not in kmer_count:
                        kmer_count[kmer] = 0
                    kmer_count[kmer] += temp_count
                else:
                    rev_kmer = find_revcomp(kmer, {})
                    if kmer <= rev_kmer:
                        if kmer not in kmer_count:
                            kmer_count[kmer] = 0
                        kmer_count[kmer] += temp_count
                    else:
                        if rev_kmer not in kmer_count:
                            kmer_count[rev_kmer] = 0
                        kmer_count[rev_kmer] += temp_count

                count_sum += temp_count

            # Normalize.
            if not revcomp:
                count_vec = [kmer_count[kmer] for kmer in kmer_list]
            else:
                revc_kmer_list = make_revcomp_kmer_list(kmer_list)
                count_vec = [kmer_count[kmer] for kmer in revc_kmer_list]
            count_vec = [round(float(e) / count_sum, 8) for e in count_vec]

            vector.append(count_vec)

    return vector
Example #4
0
def make_kmer_vector(k, alphabet, filename, revcomp=False):
    """Generate kmer vector."""
    with open(filename) as f:
        seq_list = get_data(f, alphabet=alphabet)

        if revcomp and re.search(r'[^acgtACGT]', ''.join(alphabet)) is not None:
            sys.exit("Error, Only DNA sequence can be reverse compliment.")
 
        vector = []
        kmer_list = make_kmer_list(k, alphabet)
        for seq in seq_list:
            count_sum = 0
 
            # Generate the kmer frequency dict.
            kmer_count = {}
            for kmer in kmer_list:
                temp_count = frequency(seq, kmer)
                if not revcomp:
                    if kmer not in kmer_count:
                        kmer_count[kmer] = 0
                    kmer_count[kmer] += temp_count
                else:
                    rev_kmer = find_revcomp(kmer, {})
                    if kmer <= rev_kmer:
                        if kmer not in kmer_count:
                            kmer_count[kmer] = 0
                        kmer_count[kmer] += temp_count
                    else:
                        if rev_kmer not in kmer_count:
                            kmer_count[rev_kmer] = 0
                        kmer_count[rev_kmer] += temp_count
 
                count_sum += temp_count
 
            # Normalize.
            if not revcomp:
                count_vec = [kmer_count[kmer] for kmer in kmer_list]
            else:
                revc_kmer_list = make_revcomp_kmer_list(kmer_list)
                count_vec = [kmer_count[kmer] for kmer in revc_kmer_list]
            count_vec = [round(float(e)/count_sum, 8) for e in count_vec]

            vector.append(count_vec)

    return vector
Example #5
0
def estimations_from_predictions(predictions, annotations, tuning, config):
    e_list = []
    for prediction, annotation in zip(predictions, annotations):
        string = annotation.string
        fret = annotation.fret
        normal_freq = util.frequency(fret, tuning.tuning[string - 1])

        estimations = init_estimations_from_normal(normal_freq, tuning, config)
        string = 1
        for confidence in prediction:
            for estimation in estimations:
                if estimation.string == string:
                    estimation.set_weight(confidence)

            string += 1

        # init estimation object with weight = 0
        e = make_estimation()
        # find estimation with max weight
        for estimation in estimations:
            if estimation.weight > e.weight:
                e = estimation
        e_list.append(e)
    return e_list
Example #6
0
def make_kmer_vector_ID(seq_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize):
    """Generate kmer vector."""

    # Generate the alphabet index.
    if upto:
        index = make_index_upto_k(k)
        sum = [0] * k
        len_k = k
    else:
        index = make_index(k)
        sum = [0]
        len_k = 1

    vector = []
    for seq in seq_list:
        kmer_count = {}
        # Generate the kmer frequency vector.
        for i in range(len_k):
            sum[i] = 0
            for j in range(index[i], index[i + 1]):
                kmer = kmer_list[j]
                temp_count = frequency(seq, kmer)
                # print temp_count
                if revcomp:
                    rev_kmer = find_revcomp(kmer, {})
                    if kmer <= rev_kmer:
                        if kmer not in kmer_count:
                            kmer_count[kmer] = 0
                        kmer_count[kmer] += temp_count
                    else:
                        if rev_kmer not in kmer_count:
                            kmer_count[rev_kmer] = 0
                        kmer_count[rev_kmer] += temp_count
                else:
                    if kmer not in kmer_count:
                        kmer_count[kmer] = 0
                    kmer_count[kmer] += temp_count
                sum[i] += temp_count

        # Store the kmer frequency vector.
        if revcomp:
            temp_vec = [kmer_count[kmer] for kmer in rev_kmer_list]
        else:
            temp_vec = [kmer_count[kmer] for kmer in kmer_list]

        # Normalize.
        if normalize:
            i = 0
            if not upto:
                temp_vec = [round(float(e)/sum[i], 3) for e in temp_vec]
            if upto:
                if revcomp:
                    upto_index = make_index_upto_k_revcomp(k)
                else:
                    upto_index = make_index_upto_k(k)
                j = 0
                for e in temp_vec:
                    if j >= upto_index[i + 1]:
                        i += 1
                    temp_vec[j] = round(float(e) / sum[i], 3)
                    j += 1

        vector.append(temp_vec)
    # if 0 != len(rev_kmer_list):
    #     print "The kmer is", rev_kmer_list
    # else:
    #     print "The kmer is", kmer_list
    return vector
Example #7
0
#if not defined warp[1] set to None
alpha = [
    8.037098164766618, 9.2494445061479, 11.983630862563617, 8.418011746246092,
    10.100367601431355, 9.271272689547438, 8.361221354294603, 200,
    9.109799699070852, 9.919370757465913
]
beta = [
    27.971242258713225, 13.895897768035837, 29.979076350711615,
    18.23346507541101, 17.20632120434984, 14.5832818156608, 16.282666801491864,
    200, 17.01010654043829, 14.973403850861265
]
theta = [
    7.332827665540709, 7.693271996617378, 6.409582916757133, 6.925888943127349,
    5.755089466047173, 7.137183585824163, 7.4507863470554625, 200,
    5.113392670335407, 6.688337149006028
]
delta = [
    1.6747542908272948, 0.792628712689756, 0.9962083257186725,
    1.1048836673314748, 1.23381736706717, 1.025315344646628,
    1.2172809260762698, 1.542627872299756, 1.2572786093425488,
    1.1197264326339946
]

f = util.frequency()
im = binary.bin_image((50, 50), f, '0123', 3, warp=[None, None])
for i, j, k, r in zip(alpha, beta, theta,
                      delta):  # for each time point in the data
    f.store_signal(i, j, k, r)
    # x = f.store_signal(i,j,k,r)
    image = im.create_image()
Example #8
0
def make_kmer_vector(seq_list, kmer_list, rev_kmer_list, k, upto, revcomp, normalize):
    """Generate kmer vector."""

    # Generate the alphabet index.
    if upto:
        index = make_index_upto_k(k)
        sum = [0] * k
        len_k = k
    else:
        index = make_index(k)
        sum = [0]
        len_k = 1

    vector = []
    for seq in seq_list:
        kmer_count = {}
        # Generate the kmer frequency vector.
        for i in range(len_k):
            sum[i] = 0
            for j in range(index[i], index[i + 1]):
                kmer = kmer_list[j]
                temp_count = frequency(seq, kmer)
                # print temp_count
                if revcomp:
                    rev_kmer = find_revcomp(kmer, {})
                    if kmer <= rev_kmer:
                        if kmer not in kmer_count:
                            kmer_count[kmer] = 0
                        kmer_count[kmer] += temp_count
                    else:
                        if rev_kmer not in kmer_count:
                            kmer_count[rev_kmer] = 0
                        kmer_count[rev_kmer] += temp_count
                else:
                    if kmer not in kmer_count:
                        kmer_count[kmer] = 0
                    kmer_count[kmer] += temp_count
                sum[i] += temp_count

        # Store the kmer frequency vector.
        if revcomp:
            temp_vec = [kmer_count[kmer] for kmer in rev_kmer_list]
        else:
            temp_vec = [kmer_count[kmer] for kmer in kmer_list]

        # Normalize.
        if normalize:
            i = 0
            if not upto:
                temp_vec = [round(float(e)/sum[i], 3) for e in temp_vec]
            if upto:
                if revcomp:
                    upto_index = make_index_upto_k_revcomp(k)
                else:
                    upto_index = make_index_upto_k(k)
                j = 0
                for e in temp_vec:
                    if j >= upto_index[i + 1]:
                        i += 1
                    temp_vec[j] = round(float(e) / sum[i], 3)
                    j += 1

        vector.append(temp_vec)
    # if 0 != len(rev_kmer_list):
    #     print "The kmer is", rev_kmer_list
    # else:
    #     print "The kmer is", kmer_list
    return vector