def _pairwiseDistance_matrix(inputVTK, sigma, no_of_jobs): """ An internal function used to compute an NxN distance matrix for all fibers (N) in the input data INPUT: inputVTK - input polydata file sigma - width of kernel; adjust to alter sensitivity no_of_jobs - processes to use to perform computation OUTPUT: distances - NxN matrix containing distances between fibers """ fiberArray = fibers.FiberArray() fiberArray.convertFromVTK(inputVTK, pts_per_fiber=20) distances = Parallel(n_jobs=no_of_jobs, verbose=0)( delayed(distance.fiberDistance)(fiberArray.getFiber(fidx), fiberArray) for fidx in range(0, fiberArray.no_of_fibers)) distances = np.array(distances) # Normalize between 0 and 1 distances = sklearn.preprocessing.MinMaxScaler().fit_transform(distances) return distances
def _pairwiseQDistance_matrix(inputVTK, scalarData, scalarType, no_of_jobs): """ An internal function used to compute the "distance" between quantitative points along a fiber. """ fiberArray = fibers.FiberArray() fiberArray.convertFromVTK(inputVTK, pts_per_fiber=20) no_of_fibers = fiberArray.no_of_fibers scalarArray = scalars.FiberArrayScalar() scalarArray.addScalar(inputVTK, fiberArray, scalarData, scalarType) qDistances = Parallel(n_jobs=no_of_jobs, verbose=0)( delayed(distance.scalarDistance)( scalarArray.getScalar(fiberArray, fidx, scalarType), scalarArray.getScalars(fiberArray, range(no_of_fibers), scalarType)) for fidx in range(0, no_of_fibers) ) qDistances = np.array(qDistances) # Normalize if not already normalized between 0 and 1 if np.max(qDistances) > 1.0: qDistances = sklearn.preprocessing.MinMaxScaler().fit_transform(qDistances) return qDistances
def anisotropic_smooth(inpd, fiber_distance_threshold, points_per_fiber=30, n_jobs=2, cluster_max=10): """ Average nearby fibers. The pairwise fiber distance matrix is computed, then fibers are averaged with their neighbors until an edge (>max_fiber_distance) is encountered. """ # polydata to array conversion, fixed-length fiber representation current_fiber_array = fibers.FiberArray() current_fiber_array.points_per_fiber = points_per_fiber current_fiber_array.convert_from_polydata(inpd) original_number_of_fibers = current_fiber_array.number_of_fibers # fiber list data structure initialization for easy fiber averaging curr_count = list() curr_fibers = list() curr_indices = list() for lidx in range(0, current_fiber_array.number_of_fibers): curr_fibers.append(current_fiber_array.get_fiber(lidx)) curr_count.append(1) curr_indices.append(list([lidx])) converged = False iteration_count = 0 while not converged: print "<filter.py> ITERATION:", iteration_count, "SUM FIBER COUNTS:", numpy.sum( numpy.array(curr_count)) print "<filter.py> number indices", len(curr_indices) # fiber data structures for output of this iteration next_fibers = list() next_count = list() next_indices = list() # information for this iteration done = numpy.zeros(current_fiber_array.number_of_fibers) fiber_indices = range(0, current_fiber_array.number_of_fibers) # if the maximum number of fibers have been combined, stop averaging this fiber done[numpy.nonzero(numpy.array(curr_count) >= cluster_max)] = 1 # pairwise distance matrix if USE_PARALLEL: distances = Parallel(n_jobs=n_jobs, verbose=1)(delayed(similarity.fiber_distance)( current_fiber_array.get_fiber(lidx), current_fiber_array, 0, 'Hausdorff') for lidx in fiber_indices) distances = numpy.array(distances) else: distances = \ numpy.zeros( (current_fiber_array.number_of_fibers, current_fiber_array.number_of_fibers)) for lidx in fiber_indices: distances[lidx, :] = \ similarity.fiber_distance( current_fiber_array.get_fiber(lidx), current_fiber_array, 0, 'Hausdorff') # distances to self are not of interest for lidx in fiber_indices: distances[lidx, lidx] = numpy.inf # sort the pairwise distances. distances_flat = distances.flatten() pair_order = numpy.argsort(distances_flat) print "<filter.py> DISTANCE MIN:", distances_flat[pair_order[0]], \ "DISTANCE COUNT:", distances.shape # if the smallest distance is greater or equal to the # threshold, we have converged if distances_flat[pair_order[0]] >= fiber_distance_threshold: converged = True print "<filter.py> CONVERGED" break else: print "<filter.py> NOT CONVERGED" # loop variables idx = 0 pair_idx = pair_order[idx] number_of_fibers = distances.shape[0] number_averages = 0 # combine nearest neighbors unless done, until hit threshold while distances_flat[pair_idx] < fiber_distance_threshold: # find the fiber indices corresponding to this pairwise distance # use div and mod f_row = pair_idx / number_of_fibers f_col = pair_idx % number_of_fibers # check if this neighbor pair can be combined combine = (not done[f_row]) and (not done[f_col]) if combine: done[f_row] += 1 done[f_col] += 1 # weighted average of the fibers (depending on how many each one represents) next_fibers.append( (curr_fibers[f_row] * curr_count[f_row] + \ curr_fibers[f_col] *curr_count[f_col]) / \ (curr_count[f_row] + curr_count[f_col])) # this was the regular average #next_fibers.append((curr_fibers[f_row] + curr_fibers[f_col])/2) next_count.append(curr_count[f_row] + curr_count[f_col]) number_averages += 1 #next_indices.append(list([curr_indices[f_row], curr_indices[f_col]])) next_indices.append( list(curr_indices[f_row] + curr_indices[f_col])) # increment for the loop idx += 1 pair_idx = pair_order[idx] # copy through any unvisited (already converged) fibers unvisited = numpy.nonzero(done == 0)[0] for fidx in unvisited: next_fibers.append(curr_fibers[fidx]) next_count.append(curr_count[fidx]) next_indices.append(curr_indices[fidx]) # set up for next iteration curr_fibers = next_fibers curr_count = next_count curr_indices = next_indices iteration_count += 1 # set up array for next iteration distance computation current_fiber_array = fibers.FiberArray() current_fiber_array.number_of_fibers = len(curr_fibers) current_fiber_array.points_per_fiber = points_per_fiber dims = [ current_fiber_array.number_of_fibers, current_fiber_array.points_per_fiber ] # fiber data current_fiber_array.fiber_array_r = numpy.zeros(dims) current_fiber_array.fiber_array_a = numpy.zeros(dims) current_fiber_array.fiber_array_s = numpy.zeros(dims) curr_fidx = 0 for curr_fib in curr_fibers: current_fiber_array.fiber_array_r[curr_fidx] = curr_fib.r current_fiber_array.fiber_array_a[curr_fidx] = curr_fib.a current_fiber_array.fiber_array_s[curr_fidx] = curr_fib.s curr_fidx += 1 print "<filter.py> SUM FIBER COUNTS:", numpy.sum( numpy.array(curr_count)), "SUM DONE FIBERS:", numpy.sum(done) print "<filter.py> MAX COUNT:", numpy.max( numpy.array(curr_count)), "AVGS THIS ITER:", number_averages # when converged, convert output to polydata outpd = current_fiber_array.convert_to_polydata() # color output by the number of fibers that each output fiber corresponds to outcolors = vtk.vtkFloatArray() outcolors.SetName('FiberTotal') for count in curr_count: outcolors.InsertNextTuple1(count) outpd.GetCellData().SetScalars(outcolors) # also color the input pd by output cluster number cluster_numbers = numpy.zeros(original_number_of_fibers) cluster_count = numpy.zeros(original_number_of_fibers) cluster_idx = 0 for index_list in curr_indices: indices = numpy.array(index_list).astype(int) cluster_numbers[indices] = cluster_idx cluster_count[indices] = curr_count[cluster_idx] cluster_idx += 1 outclusters = vtk.vtkFloatArray() outclusters.SetName('ClusterNumber') for cluster in cluster_numbers: outclusters.InsertNextTuple1(cluster) inpd.GetCellData().AddArray(outclusters) inpd.GetCellData().SetActiveScalars('ClusterNumber') return outpd, numpy.array(curr_count), inpd, cluster_numbers, cluster_count
def smooth(inpd, fiber_distance_sigma=25, points_per_fiber=30, n_jobs=2, upper_thresh=30): """ Average nearby fibers. The pairwise fiber distance matrix is computed, then fibers are averaged with their neighbors using Gaussian weighting. The "local density" or soft neighbor count is also output. """ sigmasq = fiber_distance_sigma * fiber_distance_sigma # polydata to array conversion, fixed-length fiber representation current_fiber_array = fibers.FiberArray() current_fiber_array.points_per_fiber = points_per_fiber current_fiber_array.convert_from_polydata(inpd) # fiber list data structure initialization for easy fiber averaging curr_count = list() curr_fibers = list() next_fibers = list() next_weights = list() for lidx in range(0, current_fiber_array.number_of_fibers): curr_fibers.append(current_fiber_array.get_fiber(lidx)) curr_count.append(1) fiber_indices = range(0, current_fiber_array.number_of_fibers) # compare squared distances to squared distance threshold upper_thresh = upper_thresh * upper_thresh print "<filter.py> Computing pairwise distances..." # pairwise distance matrix if USE_PARALLEL: distances = Parallel(n_jobs=n_jobs, verbose=1)( delayed(similarity.fiber_distance)(current_fiber_array.get_fiber( lidx), current_fiber_array, 0, 'Hausdorff') for lidx in fiber_indices) distances = numpy.array(distances) else: distances = \ numpy.zeros( (current_fiber_array.number_of_fibers, current_fiber_array.number_of_fibers)) for lidx in fiber_indices: distances[lidx, :] = \ similarity.fiber_distance( current_fiber_array.get_fiber(lidx), current_fiber_array, 0) # gaussian smooth all fibers using local neighborhood for fidx in fiber_indices: if (fidx % 100) == 0: print fidx, '/', current_fiber_array.number_of_fibers # find indices of all nearby fibers indices = numpy.nonzero(distances[fidx] < upper_thresh)[0] local_fibers = list() local_weights = list() for idx in indices: dist = distances[fidx][idx] # these are now squared distances weight = numpy.exp(-dist / sigmasq) #weight = numpy.exp(-(dist*dist)/sigmasq) local_fibers.append(curr_fibers[idx] * weight) local_weights.append(weight) # actually perform the weighted average # start with the one under the center of the kernel #out_fiber = curr_fibers[fidx] #out_weights = 1.0 out_fiber = local_fibers[0] out_weights = local_weights[0] for fiber in local_fibers[1:]: out_fiber += fiber for weight in local_weights[1:]: out_weights += weight out_fiber = out_fiber / out_weights next_fibers.append(out_fiber) next_weights.append(out_weights) # set up array for output output_fiber_array = fibers.FiberArray() output_fiber_array.number_of_fibers = len(curr_fibers) output_fiber_array.points_per_fiber = points_per_fiber dims = [ output_fiber_array.number_of_fibers, output_fiber_array.points_per_fiber ] # fiber data output_fiber_array.fiber_array_r = numpy.zeros(dims) output_fiber_array.fiber_array_a = numpy.zeros(dims) output_fiber_array.fiber_array_s = numpy.zeros(dims) next_fidx = 0 for next_fib in next_fibers: output_fiber_array.fiber_array_r[next_fidx] = next_fib.r output_fiber_array.fiber_array_a[next_fidx] = next_fib.a output_fiber_array.fiber_array_s[next_fidx] = next_fib.s next_fidx += 1 # convert output to polydata outpd = output_fiber_array.convert_to_polydata() # color by the weights or "local density" # color output by the number of fibers that each output fiber corresponds to outcolors = vtk.vtkFloatArray() outcolors.SetName('KernelDensity') for weight in next_weights: outcolors.InsertNextTuple1(weight) #outpd.GetCellData().SetScalars(outcolors) outpd.GetCellData().AddArray(outcolors) outpd.GetCellData().SetActiveScalars('KernelDensity') return outpd, numpy.array(next_weights)
def remove_outliers(inpd, min_fiber_distance, n_jobs=0, distance_method='Mean'): """ Remove fibers that have no other nearby fibers, i.e. outliers. The pairwise fiber distance matrix is computed, then fibers are rejected if their average neighbor distance (using closest 3 neighbors) is higher than min_fiber_distance. """ fiber_array = fibers.FiberArray() #fiber_array.points_per_fiber = 5 fiber_array.points_per_fiber = 10 fiber_array.convert_from_polydata(inpd) fiber_indices = range(0, fiber_array.number_of_fibers) # squared distances are computed min_fiber_distance = min_fiber_distance * min_fiber_distance # pairwise distance matrix if USE_PARALLEL and n_jobs > 0: distances = Parallel(n_jobs=n_jobs, verbose=1)( delayed(similarity.fiber_distance)(fiber_array.get_fiber(lidx), fiber_array, threshold=0, distance_method=distance_method) for lidx in fiber_indices) distances = numpy.array(distances) # now we check where there are no nearby fibers in d mindist = numpy.zeros(fiber_array.number_of_fibers) for lidx in fiber_indices: dist = numpy.sort(distances[lidx, :]) # robust minimum distance mindist[lidx] = (dist[1] + dist[2] + dist[3]) / 3.0 #mindist[lidx] = (dist[1] + dist[2]) / 2.0 else: # do this in a loop to use less memory. then parallelization can # happen over the number of subjects. mindist = numpy.zeros(fiber_array.number_of_fibers) for lidx in fiber_indices: distances = similarity.fiber_distance( fiber_array.get_fiber(lidx), fiber_array, 0, distance_method=distance_method) dist = numpy.sort(distances) # robust minimum distance mindist[lidx] = (dist[1] + dist[2] + dist[3]) / 3.0 # keep only fibers who have nearby similar fibers fiber_mask = mindist < min_fiber_distance if True: num_fibers = len(numpy.nonzero(fiber_mask)[0]), "/", len(fiber_mask) print "<filter.py> Number retained after outlier removal: ", num_fibers outpd = mask(inpd, fiber_mask, mindist) outpd_reject = mask(inpd, ~fiber_mask, mindist) return outpd, fiber_mask, outpd_reject
def laplacian_of_gaussian(inpd, fiber_distance_sigma=25, points_per_fiber=30, n_jobs=2, upper_thresh=30): """ Filter nearby fibers, using LoG weights. The pairwise fiber distance matrix is computed, then fibers are averaged with their neighbors using LoG weighting. This is essentially a fiber subtraction operation, giving vectors pointing from the center fiber under the kernel, to all nearby fibers. Thus the output of this operation is not a fiber, but we compute properties of the output that might be interesting and related to fibers. We summarize the result using the average vector at each fiber point (output is its magnitude, similar to edge strength). The covariance of the vectors is also investigated. This matrix would be spherical in an isotropic region such as a tract center (tube/line detector), or planar in a sheetlike tract (sheet detector). The equation is: (1-d^2/sigma^2) exp(-d^2/(2*sigma^2)), and weights are normalized in the neighborhood (weighted averaging). """ sigmasq = fiber_distance_sigma * fiber_distance_sigma # polydata to array conversion, fixed-length fiber representation fiber_array = fibers.FiberArray() fiber_array.points_per_fiber = points_per_fiber fiber_array.convert_from_polydata(inpd) fiber_indices = range(0, fiber_array.number_of_fibers) # pairwise distance matrix if USE_PARALLEL: distances = Parallel(n_jobs=n_jobs, verbose=1)( delayed(similarity.fiber_distance)(fiber_array.get_fiber( lidx), fiber_array, 0, 'Hausdorff') for lidx in fiber_indices) distances = numpy.array(distances) else: distances = \ numpy.zeros( (fiber_array.number_of_fibers, fiber_array.number_of_fibers)) for lidx in fiber_indices: distances[lidx, :] = \ similarity.fiber_distance( fiber_array.get_fiber(lidx), fiber_array, 0) # fiber list data structure initialization for easy fiber averaging fiber_list = list() for lidx in range(0, fiber_array.number_of_fibers): fiber_list.append(fiber_array.get_fiber(lidx)) filter_vectors = list() filter_vector_magnitudes = list() filter_confidences = list() # gaussian smooth all fibers using local neighborhood for fidx in fiber_indices: if (fidx % 100) == 0: print fidx, '/', fiber_array.number_of_fibers current_fiber = fiber_list[fidx] # find indices of all nearby fibers # this includes the center fiber under the kernel indices = numpy.nonzero(distances[fidx] < upper_thresh)[0] local_fibers = list() local_weights = list() for idx in indices: dist = distances[fidx][idx] # compute filter kernel weights weight = numpy.exp(-(dist * dist) / sigmasq) #weight = (1 - (dist*dist)/sigmasq) * numpy.exp(-(dist*dist)/(2*sigmasq)) local_fibers.append(fiber_list[idx]) local_weights.append(weight) # actually perform the weighted average #mean_weight = numpy.mean(numpy.array(local_weights)) #out_weights = local_weights[0] #for weight in local_weights[1:]: # out_weights += weight # the weights must sum to 0 for LoG # (response in constant region is 0) #mean_weight = out_weights / len(local_weights) #local_normed_weights = list() #for weight in local_weights: # local_normed_weights.append(weight - mean_weight) #match_fiber = local_fibers[0] #out_vector = local_fibers[0] * local_normed_weights[0] idx = 0 for fiber in local_fibers: #out_vector += fiber # ensure fiber ordering by matching to current fiber only # otherwise the order is undefined after fiber subtraction matched_fiber = current_fiber.match_order(fiber) #filtered_fiber = matched_version * local_normed_weights[idx] #filtered_fiber = matched_version * local_weights[idx] if idx == 0: out_vector = fibers.Fiber() out_vector.points_per_fiber = points_per_fiber out_vector.r = numpy.zeros(points_per_fiber) out_vector.a = numpy.zeros(points_per_fiber) out_vector.s = numpy.zeros(points_per_fiber) #filtered_fiber = match_fiber.match_order(fiber) #out_vector.r = (out_vector.r + matched_fiber.r) * local_weights[idx] #out_vector.a = (out_vector.a + matched_fiber.a) * local_weights[idx] #out_vector.s = (out_vector.s + matched_fiber.s) * local_weights[idx] out_vector.r += (current_fiber.r - matched_fiber.r) * local_weights[idx] out_vector.a += (current_fiber.a - matched_fiber.a) * local_weights[idx] out_vector.s += (current_fiber.s - matched_fiber.s) * local_weights[idx] idx += 1 total_weights = numpy.sum(numpy.array(local_weights)) out_vector = out_vector / total_weights filter_vectors.append(out_vector) filter_confidences.append(total_weights) filter_vector_magnitudes.append(numpy.sqrt(\ numpy.multiply(out_vector.r, out_vector.r) + \ numpy.multiply(out_vector.a, out_vector.a) + \ numpy.multiply(out_vector.s, out_vector.s))) #filter_vector_magnitudes.append(numpy.sum(out_vector.r)) # output a new pd!!!! # with fixed length fibers. and the new vector field. # output the vectors from the filtering outpd = fiber_array.convert_to_polydata() vectors = vtk.vtkFloatArray() vectors.SetName('FiberDifferenceVectors') vectors.SetNumberOfComponents(3) for vec in filter_vectors: for idx in range(points_per_fiber): vectors.InsertNextTuple3(vec.r[idx], vec.a[idx], vec.s[idx]) magnitudes = vtk.vtkFloatArray() magnitudes.SetName('FiberDifferenceMagnitudes') magnitudes.SetNumberOfComponents(1) for mag in filter_vector_magnitudes: for idx in range(points_per_fiber): magnitudes.InsertNextTuple1(mag[idx]) confidences = vtk.vtkFloatArray() confidences.SetName('FiberDifferenceConfidences') confidences.SetNumberOfComponents(1) for mag in filter_confidences: for idx in range(points_per_fiber): confidences.InsertNextTuple1(mag) outpd.GetPointData().AddArray(vectors) outpd.GetPointData().SetActiveVectors('FiberDifferenceVectors') outpd.GetPointData().AddArray(confidences) outpd.GetPointData().SetActiveScalars('FiberDifferenceConfidences') outpd.GetPointData().AddArray(magnitudes) outpd.GetPointData().SetActiveScalars('FiberDifferenceMagnitudes') # color by the weights or "local density" # color output by the number of fibers that each output fiber corresponds to #outcolors = vtk.vtkFloatArray() #outcolors.SetName('KernelDensity') #for weight in next_weights: # outcolors.InsertNextTuple1(weight) #inpd.GetCellData().AddArray(outcolors) #inpd.GetCellData().SetActiveScalars('KernelDensity') #outcolors = vtk.vtkFloatArray() #outcolors.SetName('EdgeMagnitude') #for magnitude in filter_vector_magnitudes: # outcolors.InsertNextTuple1(magnitude) #inpd.GetCellData().AddArray(outcolors) #inpd.GetCellData().SetActiveScalars('EdgeMagnitude') return outpd, numpy.array(filter_vector_magnitudes)