def calc_2demd(data,row_tree, col_tree, row_alpha=1.0, row_beta=0.0, col_alpha=1.0, col_beta=0.0, exc_sing=False, exc_raw=False): """ Calculates 2D EMD on database of data using a tree on the rows and columns. each level is weighted by 2**((1-level)*alpha) each folder size (fraction) is raised to the beta power for weighting. """ nrows,ncols,nchannels = np.shape(data) assert nrows == row_tree.size, "Tree size must match # rows in data." assert ncols == col_tree.size, "Tree size must match # cols in data." row_folder_fraction = np.array([((node.size*1.0/nrows)**row_beta)* (2.0**((1.0-node.level)*row_alpha)) for node in row_tree]) col_folder_fraction = np.array([((node.size*1.0/ncols)**col_beta)* (2.0**((1.0-node.level)*col_alpha)) for node in col_tree]) if exc_sing: for node in row_tree: if node.size == 1: row_folder_fraction[node.idx] = 0.0 for node in col_tree: if node.size == 1: col_folder_fraction[node.idx] = 0.0 folder_frac = np.outer(row_folder_fraction, col_folder_fraction) avgs = tree_util.bitree_averages(data[:,:,0], row_tree, col_tree) avgs = folder_frac * avgs if exc_raw: col_singletons_start = col_tree.tree_size - ncols row_singletons_start = row_tree.tree_size - nrows avgs = avgs[:row_singletons_start,:col_singletons_start] sums3d = np.zeros((nchannels,np.size(avgs))) sums3d[0,:] = np.reshape(avgs,(1,-1)) for t in range(1,nchannels): avgs = tree_util.bitree_averages(data[:,:,t], row_tree, col_tree) avgs = folder_frac * avgs if exc_raw: avgs = avgs[:row_singletons_start,:col_singletons_start] sums3d[t,:] = np.reshape(avgs,(1,-1)) pds = spsp.distance.pdist(sums3d, "cityblock") distances = spsp.distance.squareform(pds) return distances
def bitree_product_transform(data,row_tree,col_tree): avs = tree_util.bitree_averages(data,row_tree,col_tree) coefs = np.zeros(np.shape(avs)) #requires that node 0 is the root of the tree coefs[0,0] = avs[0,0] for node in col_tree[1:]: coefs[0,node.idx] = avs[0,node.idx]/avs[0,node.parent.idx] for node in row_tree[1:]: coefs[node.idx,0] = avs[node.idx,0]/avs[node.parent.idx,0] for row_node in row_tree[1:]: for col_node in col_tree[1:]: dparent = avs[row_node.parent.idx,col_node.parent.idx]*avs[row_node.idx,col_node.idx] parent_product = avs[row_node.parent.idx,col_node.idx]*avs[row_node.idx,col_node.parent.idx] coefs[row_node.idx,col_node.idx] = dparent/parent_product coefs[np.isnan(coefs)] = 1.0 return coefs
def bitree_null_coeffs(data,row_tree,col_tree): null_coeffs = np.zeros([row_tree.tree_size,col_tree.tree_size],np.float) data_avgs = tree_util.bitree_averages(data,row_tree,col_tree) for i in xrange(row_tree.tree_size): for j in xrange(col_tree.tree_size): row_node = row_tree[i] col_node = col_tree[j] if i == 0 and j == 0: #it's the entire matrix, so the null coeff is the average. null_coeffs[0,0] = data_avgs[0,0] elif i==0 or j==0: #if we're on the outside of the matrix, then the null #coefficients are just zero. null_coeffs[i,j] = 0.0 else: #it's a node with two parents. #now the null coefficient is more complicated. row_parent = row_node.parent col_parent = col_node.parent #W = B_A + B_WX + B_WY + B_W #we want W = avg on the union of the parents. total_avg = data_avgs[row_parent.idx,col_parent.idx] parent_avg1 = data_avgs[row_node.idx,col_parent.idx] parent_avg2 = data_avgs[row_parent.idx,col_node.idx] sub_avg = data_avgs[row_node.idx,col_node.idx] parent_size1 = row_node.size*col_parent.size parent_size2 = row_parent.size*col_node.size sub_size = row_node.size*col_node.size union_sum = parent_avg1*parent_size1 + parent_avg2*parent_size2 - sub_avg*sub_size union_denom = parent_size1 + parent_size2 - sub_size union_avg = union_sum/(1.0*union_denom) null_coeffs[i,j] = union_avg - (parent_avg1 + parent_avg2 - total_avg) return null_coeffs
def calc_2demd_ref(ref_data,data,row_tree,col_tree, row_alpha=1.0, row_beta=0.0, col_alpha=1.0, col_beta=0.0, exc_sing=False,exc_raw=False): """ Calculates the EMD from a set of points to a reference set of points The columns of ref_data are each a reference set point. The columns of data are each a point outside the reference set. """ if data.ndim == 2: ref_rows,ref_cols = np.shape(ref_data) rows,cols = np.shape(data) else: ref_rows,ref_cols,ref_chans = np.shape(ref_data) rows,cols,chans = np.shape(data) col_singletons_start = col_tree.tree_size - cols row_singletons_start = row_tree.tree_size - rows assert rows == row_tree.size, "Tree size must match # rows in data." assert ref_rows == rows, "Mismatched row #: reference and sample sets." assert cols == col_tree.size, "Tree size must match # cols in data." assert ref_cols == cols, "Mismatched col #: reference and sample sets." row_folder_fraction = np.array([((node.size*1.0/rows)**row_beta)* (2.0**((1.0-node.level)*row_alpha)) for node in row_tree]) col_folder_fraction = np.array([((node.size*1.0/cols)**col_beta)* (2.0**((1.0-node.level)*col_alpha)) for node in col_tree]) if exc_sing: for node in row_tree: if node.size == 1: row_folder_fraction[node.idx] = 0.0 for node in col_tree: if node.size == 1: col_folder_fraction[node.idx] = 0.0 folder_frac = np.outer(row_folder_fraction, col_folder_fraction) if data.ndim == 2: ref_coefs = tree_util.bitree_averages(ref_data, row_tree, col_tree) coefs = tree_util.bitree_averages(data, row_tree, col_tree) coefs = folder_frac * coefs ref_coefs = folder_frac * ref_coefs if exc_raw: avgs = avgs[:row_singletons_start,:col_singletons_start] return spsp.distance.cityblock(coefs.flatten(),ref_coefs.flatten()) else: if exc_raw: folder_frac = folder_frac[:row_singletons_start,:col_singletons_start] sums3d = np.zeros((chans,np.size(folder_frac))) for t in range(0,chans): avgs = tree_util.bitree_averages(data[:,:,t], row_tree, col_tree) if exc_raw: avgs = avgs[:row_singletons_start,:col_singletons_start] avgs = folder_frac * avgs sums3d[t,:] = np.reshape(avgs,(1,-1)) ref_sums3d = np.zeros((ref_chans,np.size(folder_frac))) for t in range(0,ref_chans): avgs = tree_util.bitree_averages(ref_data[:,:,t], row_tree, col_tree) if exc_raw: avgs = avgs[:row_singletons_start,:col_singletons_start] avgs = folder_frac * avgs ref_sums3d[t,:] = np.reshape(avgs,(1,-1)) return spsp.distance.cdist(sums3d,ref_sums3d, "cityblock")