Example #1
0
def get_psedpc_matrix(filename,
                      n,
                      r,
                      w,
                      pattern_list=[
                          'A', 'C', 'G', 'U', 'A-U', 'U-A', 'G-C', 'C-G',
                          'G-U', 'U-G'
                      ]):
    '''This is a complete process in PseSSC, aim to generate feature vector.
     
       The FASTA format of the input file is as follows:    
       >sequence name
       An RNA sequence should be consist of AGCU
       Secondary structure
    :param filename: Name of input file.
    :param n: The maximum distance between structure statuses.
    :param r: The highest counted rank (or tier) of the structural correlation along a RNA chain.
    :param w: The wight of theta, from 0.1 to 1.
    :param pattern_list: Structure statuses, default:['A', 'C', 'G', 'U', 'A-U', 'U-A', 'G-C', 'C-G', 'G-U', 'U-G'].'''
    with open(filename) as f:
        seqsslst = get_rnasc_data(f)
    features = []
    for seqss in seqsslst:
        vector = get_psedpc_vector(seqss, n, r, w, pattern_list)
        features.append(vector)
    return features
Example #2
0
def get_psedpc_matrix(filename, n, r, w, pattern_list = ['A', 'C', 'G', 'U', 'A-U', 'U-A', 'G-C', 'C-G', 'G-U', 'U-G']):
    '''This is a complete process in PseSSC, aim to generate feature vector.
     
       The FASTA format of the input file is as follows:    
       >sequence name
       An RNA sequence should be consist of AGCU
       Secondary structure
    :param filename: Name of input file.
    :param n: The maximum distance between structure statuses.
    :param r: The highest counted rank (or tier) of the structural correlation along a RNA chain.
    :param w: The wight of theta, from 0.1 to 1.
    :param pattern_list: Structure statuses, default:['A', 'C', 'G', 'U', 'A-U', 'U-A', 'G-C', 'C-G', 'G-U', 'U-G'].'''
    with open(filename) as f:
        seqsslst= get_rnasc_data(f)
    features = []
    for seqss in seqsslst:
        vector = get_psedpc_vector(seqss, n, r, w, pattern_list)
        features.append(vector)
    return features
Example #3
0
def get_triplet_matrix(filename):
    '''This is a complete process in triplet,aim to generate feature vectors.

       The FASTA format of the input file is as follows:
       >Sequence name
       An RNA sequence should be consist of AGCU
       Secondary structure

    :param filename: Name of inputfile.
    :return: Feature matrix through Triplet.
    '''
    letter = ["(","."]
    alphabet = 'AGCU'     #Don't change the alphabetical, or the order of features will change.
    with open(filename) as f:
        seqsslst= get_rnasc_data(f)
    tripletdict = get_triplet_dict(letter, 3, alphabet)
    features = []
    for seqss in seqsslst:
        vector = get_triplet_vector(seqss, tripletdict)
        features.append(vector)
    return features
Example #4
0
def get_triplet_matrix(filename):
    '''This is a complete process in triplet,aim to generate feature vectors.
     
       The FASTA format of the input file is as follows:    
       >Sequence name
       An RNA sequence should be consist of AGCU
       Secondary structure
 
    :param filename: Name of inputfile.
    :return: Feature matrix through Triplet.
    '''
    letter = ["(","."]
    alphabet = 'AGCU'     #Don't change the alphabetical, or the order of features will change.
    with open(filename) as f:
        seqsslst= get_rnasc_data(f)
    tripletdict = get_triplet_dict(letter, 3, alphabet)
    features = []
    for seqss in seqsslst:
        vector = get_triplet_vector(seqss, tripletdict)
        features.append(vector)
    return features
Example #5
0
def get_pseknc_matrix(filename, k):
    '''This is a complete process in PseKNC,aim to generate feature matrix.

       The FASTA format of the input file is as follows:
       >Sequence name
       An RNA sequence should be consist of AGCU
       Secondary structure

    :param filename: Name of input file.
    :return: Feature matrix through PseKNC.
    '''

    alphabet = 'ACGU'
    letter = list(alphabet)
    with open(filename) as f:
        seqsslst = get_rnasc_data(f)
    psekncdict = get_pseknc_dict(letter, k)
    features = []
    for seqss in seqsslst:
        vector = get_pseknc_vector(seqss, psekncdict, k)
        features.append(vector)
    return features
Example #6
0
def get_pseknc_matrix(filename, k):
    '''This is a complete process in PseKNC,aim to generate feature matrix.
     
       The FASTA format of the input file is as follows:    
       >Sequence name
       An RNA sequence should be consist of AGCU
       Secondary structure
 
    :param filename: Name of input file.
    :return: Feature matrix through PseKNC.
    '''
    
    alphabet = 'ACGU'
    letter = list(alphabet)
    with open(filename) as f:
        seqsslst = get_rnasc_data(f)
    psekncdict = get_pseknc_dict(letter, k)
    features = []
    for seqss in seqsslst:
        vector = get_pseknc_vector(seqss, psekncdict, k)
        features.append(vector)
    return features