def get_ali_entropy_syn(alim, positions=None, alpha=alpha[:5], VERBOSE=0):
    '''Get entropy of alignment at some positions'''
    from collections import defaultdict
    from hivwholeseq.utils.sequence import translate_with_gaps as translate

    if len(ali[0]) % 3:
        raise ValueError('The alignment length is not a multiple of 3')

    if positions is None:
        positions = np.arange(len(ali[0]) // 3)

    # The data structure is a nested dict by position and amino acid
    S = {}
    # Iterate over codons
    for pos in positions:
        if VERBOSE >= 3:
            print pos

        asub = alim[:, pos * 3: (pos + 1) * 3]
        aacount = defaultdict(lambda: defaultdict(int))
        for cod in asub:
            cod = ''.join(cod)
            aacount[translate(cod)][cod] += 1

        Spos = {}
        for aa, codd in aacount.iteritems():
            af = np.array(codd.values(), float)
            af /= af.sum()

            Spos[aa] = get_entropy(af)
        S[pos] = Spos

    return S
def get_ali_entropy_syn(alim, positions=None, alpha=alpha[:5], VERBOSE=0):
    '''Get entropy of alignment at some positions'''
    from collections import defaultdict
    from hivwholeseq.utils.sequence import translate_with_gaps as translate

    if len(ali[0]) % 3:
        raise ValueError('The alignment length is not a multiple of 3')

    if positions is None:
        positions = np.arange(len(ali[0]) // 3)

    # The data structure is a nested dict by position and amino acid
    S = {}
    # Iterate over codons
    for pos in positions:
        if VERBOSE >= 3:
            print pos

        asub = alim[:, pos * 3:(pos + 1) * 3]
        aacount = defaultdict(lambda: defaultdict(int))
        for cod in asub:
            cod = ''.join(cod)
            aacount[translate(cod)][cod] += 1

        Spos = {}
        for aa, codd in aacount.iteritems():
            af = np.array(codd.values(), float)
            af /= af.sum()

            Spos[aa] = get_entropy(af)
        S[pos] = Spos

    return S
def get_ali_entropy(ali, positions=None, alpha=alpha[:5], VERBOSE=0):
    '''Get entropy of alignment at some positions
    
    Parameters:
       - alpha: alphabet for the sequences, defaults to ACGT-.
    '''
    if positions is None:
        positions = np.arange(len(ali[0]))

    afs = np.zeros((len(alpha), len(positions)))
    for i, pos in enumerate(positions):
        af = np.zeros(len(alpha), float)
        col = np.fromstring(ali[:, pos], 'S1')
        for ia, nuc in enumerate(alpha):
            af[ia] = (col == nuc).sum()
        af /= af.sum()
        afs[:, i] = af

    S = get_entropy(afs)
    return S
Exemple #4
0
def get_ali_entropy(ali, positions=None, alpha=alpha[:5], VERBOSE=0):
    '''Get entropy of alignment at some positions
    
    Parameters:
       - alpha: alphabet for the sequences, defaults to ACGT-.
    '''
    if positions is None:
        positions = np.arange(len(ali[0]))

    afs = np.zeros((len(alpha), len(positions)))
    for i, pos in enumerate(positions):
        af = np.zeros(len(alpha), float)
        col = np.fromstring(ali[:, pos], 'S1')
        for ia, nuc in enumerate(alpha):
            af[ia] = (col == nuc).sum()
        af /= af.sum()
        afs[:, i] = af

    S = get_entropy(afs)
    return S