예제 #1
0
def calc_2demd(data,row_tree, col_tree, row_alpha=1.0, row_beta=0.0, 
	col_alpha=1.0, col_beta=0.0, exc_sing=False, exc_raw=False):
    """
    Calculates 2D EMD on database of data using a tree on the rows and columns.
    each level is weighted by 2**((1-level)*alpha)
    each folder size (fraction) is raised to the beta power for weighting.
    """
    nrows,ncols,nchannels = np.shape(data)
    assert nrows == row_tree.size, "Tree size must match # rows in data."
    assert ncols == col_tree.size, "Tree size must match # cols in data."
    
    row_folder_fraction = np.array([((node.size*1.0/nrows)**row_beta)*
                                (2.0**((1.0-node.level)*row_alpha))
                                 for node in row_tree])
    col_folder_fraction = np.array([((node.size*1.0/ncols)**col_beta)*
                                (2.0**((1.0-node.level)*col_alpha))
                                 for node in col_tree])
    if exc_sing:
        for node in row_tree:
            if node.size == 1:
                row_folder_fraction[node.idx] = 0.0
        for node in col_tree:
            if node.size == 1:
                col_folder_fraction[node.idx] = 0.0
    folder_frac = np.outer(row_folder_fraction, col_folder_fraction)
                      
    avgs = tree_util.bitree_averages(data[:,:,0], row_tree, col_tree)
    avgs = folder_frac * avgs
    
    if exc_raw:
        col_singletons_start = col_tree.tree_size - ncols
        row_singletons_start = row_tree.tree_size - nrows
        avgs = avgs[:row_singletons_start,:col_singletons_start]
    
    sums3d = np.zeros((nchannels,np.size(avgs)))
    
    sums3d[0,:] = np.reshape(avgs,(1,-1))
    for t in range(1,nchannels):
        avgs = tree_util.bitree_averages(data[:,:,t], row_tree, col_tree)
        avgs = folder_frac * avgs
        if exc_raw:
            avgs = avgs[:row_singletons_start,:col_singletons_start]
        sums3d[t,:] = np.reshape(avgs,(1,-1))
    
    pds = spsp.distance.pdist(sums3d, "cityblock")
    distances = spsp.distance.squareform(pds)

    return distances
예제 #2
0
def bitree_product_transform(data,row_tree,col_tree):
    avs = tree_util.bitree_averages(data,row_tree,col_tree)
    coefs = np.zeros(np.shape(avs))
    
    #requires that node 0 is the root of the tree
    coefs[0,0] = avs[0,0]
    for node in col_tree[1:]:
        coefs[0,node.idx] = avs[0,node.idx]/avs[0,node.parent.idx]
    for node in row_tree[1:]:
        coefs[node.idx,0] = avs[node.idx,0]/avs[node.parent.idx,0]
    
    for row_node in row_tree[1:]:
        for col_node in col_tree[1:]:
            dparent = avs[row_node.parent.idx,col_node.parent.idx]*avs[row_node.idx,col_node.idx]
            parent_product = avs[row_node.parent.idx,col_node.idx]*avs[row_node.idx,col_node.parent.idx]
            coefs[row_node.idx,col_node.idx] = dparent/parent_product
    
    coefs[np.isnan(coefs)] = 1.0
    return coefs
예제 #3
0
def bitree_null_coeffs(data,row_tree,col_tree):
    null_coeffs = np.zeros([row_tree.tree_size,col_tree.tree_size],np.float)

    data_avgs = tree_util.bitree_averages(data,row_tree,col_tree)
    for i in xrange(row_tree.tree_size):
        for j in xrange(col_tree.tree_size):
            row_node = row_tree[i]
            col_node = col_tree[j]
            if i == 0 and j == 0:
                #it's the entire matrix, so the null coeff is the average.
                null_coeffs[0,0] = data_avgs[0,0]
            elif i==0 or j==0:
                #if we're on the outside of the matrix, then the null 
                #coefficients are just zero.
                null_coeffs[i,j] = 0.0
            else:
                #it's a node with two parents.
                #now the null coefficient is more complicated.
                row_parent = row_node.parent
                col_parent = col_node.parent
                
                #W = B_A + B_WX + B_WY + B_W
                #we want W = avg on the union of the parents.
                total_avg = data_avgs[row_parent.idx,col_parent.idx]
                parent_avg1 = data_avgs[row_node.idx,col_parent.idx]
                parent_avg2 = data_avgs[row_parent.idx,col_node.idx]
                sub_avg = data_avgs[row_node.idx,col_node.idx]
                parent_size1 = row_node.size*col_parent.size
                parent_size2 = row_parent.size*col_node.size
                sub_size = row_node.size*col_node.size

                union_sum = parent_avg1*parent_size1 + parent_avg2*parent_size2 - sub_avg*sub_size
                union_denom = parent_size1 + parent_size2 - sub_size
                union_avg = union_sum/(1.0*union_denom)

                null_coeffs[i,j] = union_avg - (parent_avg1 + parent_avg2 - total_avg) 
    return null_coeffs
예제 #4
0
def calc_2demd_ref(ref_data,data,row_tree,col_tree, row_alpha=1.0, row_beta=0.0, 
	col_alpha=1.0, col_beta=0.0, exc_sing=False,exc_raw=False):
    """
    Calculates the EMD from a set of points to a reference set of points
    The columns of ref_data are each a reference set point.
    The columns of data are each a point outside the reference set.
    """
    if data.ndim == 2:
        ref_rows,ref_cols = np.shape(ref_data)
        rows,cols = np.shape(data)
    else:
        ref_rows,ref_cols,ref_chans = np.shape(ref_data)
        rows,cols,chans = np.shape(data)

    col_singletons_start = col_tree.tree_size - cols
    row_singletons_start = row_tree.tree_size - rows
            
    assert rows == row_tree.size, "Tree size must match # rows in data."
    assert ref_rows == rows, "Mismatched row #: reference and sample sets."
    assert cols == col_tree.size, "Tree size must match # cols in data."
    assert ref_cols == cols, "Mismatched col #: reference and sample sets."

    row_folder_fraction = np.array([((node.size*1.0/rows)**row_beta)*
                                (2.0**((1.0-node.level)*row_alpha))
                                 for node in row_tree])
    col_folder_fraction = np.array([((node.size*1.0/cols)**col_beta)*
                                (2.0**((1.0-node.level)*col_alpha))
                                 for node in col_tree])
    if exc_sing:
        for node in row_tree:
            if node.size == 1:
                row_folder_fraction[node.idx] = 0.0
        for node in col_tree:
            if node.size == 1:
                col_folder_fraction[node.idx] = 0.0
    folder_frac = np.outer(row_folder_fraction, col_folder_fraction)
 
    if data.ndim == 2:
        ref_coefs = tree_util.bitree_averages(ref_data, row_tree, col_tree)
        coefs = tree_util.bitree_averages(data, row_tree, col_tree)
        coefs = folder_frac * coefs
        ref_coefs = folder_frac * ref_coefs
        
        if exc_raw:
            avgs = avgs[:row_singletons_start,:col_singletons_start]
        
        return spsp.distance.cityblock(coefs.flatten(),ref_coefs.flatten())
    else:
        if exc_raw:
            folder_frac = folder_frac[:row_singletons_start,:col_singletons_start] 
               
        sums3d = np.zeros((chans,np.size(folder_frac)))
        for t in range(0,chans):
            avgs = tree_util.bitree_averages(data[:,:,t], row_tree, col_tree)
            if exc_raw:
                avgs = avgs[:row_singletons_start,:col_singletons_start]
            avgs = folder_frac * avgs
            
            sums3d[t,:] = np.reshape(avgs,(1,-1))
        
        ref_sums3d = np.zeros((ref_chans,np.size(folder_frac)))
        for t in range(0,ref_chans):
            avgs = tree_util.bitree_averages(ref_data[:,:,t], row_tree, col_tree)
            if exc_raw:
                avgs = avgs[:row_singletons_start,:col_singletons_start]
            avgs = folder_frac * avgs
            
            ref_sums3d[t,:] = np.reshape(avgs,(1,-1))
          
        return spsp.distance.cdist(sums3d,ref_sums3d, "cityblock")