Example #1
0
def safe_log(a):
    """Returns the log (base 2) of each nonzero item in a.

    a: Numeric array

    WARNING: log2 is only defined on positive numbers, so make sure
    there are no negative numbers in the array.

    Always returns an array with floats in there to avoid unexpected
    results when applying it to an array with just integers.
    """
    c = array(a.copy(),Float64)
    flat = ravel(c)
    nz_i = nonzero(flat)
    nz_e = take(flat,nz_i)
    log_nz = log2(nz_e)
    put(flat,nz_i,log_nz)
    return c
Example #2
0
def pairs_to_array(pairs, num_items=None, transform=None):
    """Returns array with same data as pairs (list of tuples).

    pairs can contain (first, second, weight) or (first, second) tuples.
    If 2 items in the tuple, weight will be assumed to be 1.

    num_items should contain the number of items that the pairs are chosen
    from. If None, will calculate based on the largest item in the actual
    list.

    transform contains a array that maps indices in the pairs coordinates
    to other indices, i.e. transform[old_index] = new_index. It is
    anticipated that transform will be the result of calling ungapped_to_gapped
    on the original, gapped sequence before the sequence is passed into
    something that strips out the gaps (e.g. for motif finding or RNA folding).

    WARNING: all tuples must be the same length! (i.e. if weight is supplied
    for any, it must be supplied for all.

    WARNING: if num_items is actually smaller than the biggest index in the
    list (+ 1, because the indices start with 0), you'll get an exception
    when trying to place the object. Don't do it.
    """
    #handle easy case
    if not pairs:
        return array([])
    data = array(pairs)
    #figure out if we're mapping the indices to gapped coordinates
    if transform:
        #pairs of indices
        idx_pairs = take(transform, data[:,0:2].astype(Int32))    
    else:
        idx_pairs = data[:,0:2].astype(Int32)
    #figure out biggest item if not supplied
    if num_items is None:
        num_items = int(max(ravel(idx_pairs))) + 1
    #make result array
    result = zeros((num_items,num_items), Float64)
    if len(data[0]) == 2:
        values = 1
    else:
        values = data[:,2]
    put(ravel(result), idx_pairs[:,0]*num_items+idx_pairs[:,1], values)
    return result
Example #3
0
def classify(struct, verbose=False):
    """Classifies a Vienna-format string into structural categories.
    
    struct may be a string or a real Vienna structure. It is tested on
    validity, because classifying invalid structures is very unreliable.
    If the number of closing brackets is larger than the number of opening
    brackets, an error would be raised, but if it's the other way around, 
    weird characters could be added to the string. For instance, classifying
    '.((.)' would give "\x00SSLS". This happens because the ends are not
    handled correctly if the stack is not back to its one-level state.
    """
    
    #Test whether structure is valid. Classifying invalid structures
    #is very unreliable.
    try:
        Vienna(struct)
    except IndexError:
        raise IndexError, "Trying to classify an invalid Vienna structure: %s"\
        %(struct)
    
    MAX_STEMS=1000

    #implement stack as three-item list
    PARENT = 0
    ITEMS = 1
    DEGREE = 2

    STEM, LOOP, BULGE, JUNCTION, END, FLEXIBLE = map(ord, 'SLBJEF')
    #WARNING: won't work if more than max_stems come off a junction
    LEVELS = [FLEXIBLE, LOOP, BULGE] + [JUNCTION]*MAX_STEMS

    length = len(struct)
    result = zeros((length,), 'c')
    stack = [None,[],0]
    curr_level = stack
    if verbose:
        print 'curr_level:', curr_level
        print 'result:', result

    for i, c in enumerate(struct):
        if verbose:
            print 'pos, char:',i,c
        #open parens add new level to stack
        if c == '(':
            curr_level = [curr_level,[],1]
            result[i] = STEM
        #unpaired base gets appended to current level
        elif c == '.':
            curr_level[ITEMS].append(i)
        #closed parens subtract level from stack and assign state
        elif c == ')':   #note: will handle end separately
            result[i] = STEM
            put(result, curr_level[ITEMS], LEVELS[curr_level[DEGREE]])
            curr_level = curr_level[PARENT]
            curr_level[DEGREE] += 1
        if verbose:
            print 'curr_level:', curr_level
            print 'result', result
            
    #handle ends and flexible bases
    end_items = curr_level[ITEMS]
    if end_items:
        first_start = struct.find('(')
        if first_start == -1:
            first_start = length+1
        last_end = struct.rfind(')')
        put(result, [i for i in end_items if first_start<i<last_end], FLEXIBLE)
        put(result, [i for i in end_items if not first_start<i<last_end], END)
    return result.tostring()