def anisotropic_smooth(inpd, fiber_distance_threshold, points_per_fiber=30, n_jobs=2, cluster_max = 10): """ Average nearby fibers. The pairwise fiber distance matrix is computed, then fibers are averaged with their neighbors until an edge (>max_fiber_distance) is encountered. """ # polydata to array conversion, fixed-length fiber representation current_fiber_array = fibers.FiberArray() current_fiber_array.points_per_fiber = points_per_fiber current_fiber_array.convert_from_polydata(inpd) original_number_of_fibers = current_fiber_array.number_of_fibers # fiber list data structure initialization for easy fiber averaging curr_count = list() curr_fibers = list() curr_indices = list() for lidx in range(0, current_fiber_array.number_of_fibers): curr_fibers.append(current_fiber_array.get_fiber(lidx)) curr_count.append(1) curr_indices.append(list([lidx])) converged = False iteration_count = 0 while not converged: print "<filter.py> ITERATION:", iteration_count, "SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count)) print "<filter.py> number indices", len(curr_indices) # fiber data structures for output of this iteration next_fibers = list() next_count = list() next_indices = list() # information for this iteration done = numpy.zeros(current_fiber_array.number_of_fibers) fiber_indices = range(0, current_fiber_array.number_of_fibers) # if the maximum number of fibers have been combined, stop averaging this fiber done[numpy.nonzero(numpy.array(curr_count) >= cluster_max)] = 1 # pairwise distance matrix if USE_PARALLEL: distances = Parallel(n_jobs=n_jobs, verbose=1)( delayed(similarity.fiber_distance)( current_fiber_array.get_fiber(lidx), current_fiber_array, 0, 'Hausdorff') for lidx in fiber_indices) distances = numpy.array(distances) else: distances = \ numpy.zeros( (current_fiber_array.number_of_fibers, current_fiber_array.number_of_fibers)) for lidx in fiber_indices: distances[lidx, :] = \ similarity.fiber_distance( current_fiber_array.get_fiber(lidx), current_fiber_array, 0, 'Hausdorff') # distances to self are not of interest for lidx in fiber_indices: distances[lidx,lidx] = numpy.inf # sort the pairwise distances. distances_flat = distances.flatten() pair_order = numpy.argsort(distances_flat) print "<filter.py> DISTANCE MIN:", distances_flat[pair_order[0]], \ "DISTANCE COUNT:", distances.shape # if the smallest distance is greater or equal to the # threshold, we have converged if distances_flat[pair_order[0]] >= fiber_distance_threshold: converged = True print "<filter.py> CONVERGED" break else: print "<filter.py> NOT CONVERGED" # loop variables idx = 0 pair_idx = pair_order[idx] number_of_fibers = distances.shape[0] number_averages = 0 # combine nearest neighbors unless done, until hit threshold while distances_flat[pair_idx] < fiber_distance_threshold: # find the fiber indices corresponding to this pairwise distance # use div and mod f_row = pair_idx / number_of_fibers f_col = pair_idx % number_of_fibers # check if this neighbor pair can be combined combine = (not done[f_row]) and (not done[f_col]) if combine : done[f_row] += 1 done[f_col] += 1 # weighted average of the fibers (depending on how many each one represents) next_fibers.append( (curr_fibers[f_row] * curr_count[f_row] + \ curr_fibers[f_col] *curr_count[f_col]) / \ (curr_count[f_row] + curr_count[f_col])) # this was the regular average #next_fibers.append((curr_fibers[f_row] + curr_fibers[f_col])/2) next_count.append(curr_count[f_row] + curr_count[f_col]) number_averages += 1 #next_indices.append(list([curr_indices[f_row], curr_indices[f_col]])) next_indices.append(list(curr_indices[f_row] + curr_indices[f_col])) # increment for the loop idx += 1 pair_idx = pair_order[idx] # copy through any unvisited (already converged) fibers unvisited = numpy.nonzero(done==0)[0] for fidx in unvisited: next_fibers.append(curr_fibers[fidx]) next_count.append(curr_count[fidx]) next_indices.append(curr_indices[fidx]) # set up for next iteration curr_fibers = next_fibers curr_count = next_count curr_indices = next_indices iteration_count += 1 # set up array for next iteration distance computation current_fiber_array = fibers.FiberArray() current_fiber_array.number_of_fibers = len(curr_fibers) current_fiber_array.points_per_fiber = points_per_fiber dims = [current_fiber_array.number_of_fibers, current_fiber_array.points_per_fiber] # fiber data current_fiber_array.fiber_array_r = numpy.zeros(dims) current_fiber_array.fiber_array_a = numpy.zeros(dims) current_fiber_array.fiber_array_s = numpy.zeros(dims) curr_fidx = 0 for curr_fib in curr_fibers: current_fiber_array.fiber_array_r[curr_fidx] = curr_fib.r current_fiber_array.fiber_array_a[curr_fidx] = curr_fib.a current_fiber_array.fiber_array_s[curr_fidx] = curr_fib.s curr_fidx += 1 print "<filter.py> SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count)), "SUM DONE FIBERS:", numpy.sum(done) print "<filter.py> MAX COUNT:" , numpy.max(numpy.array(curr_count)), "AVGS THIS ITER:", number_averages # when converged, convert output to polydata outpd = current_fiber_array.convert_to_polydata() # color output by the number of fibers that each output fiber corresponds to outcolors = vtk.vtkFloatArray() outcolors.SetName('FiberTotal') for count in curr_count: outcolors.InsertNextTuple1(count) outpd.GetCellData().SetScalars(outcolors) # also color the input pd by output cluster number cluster_numbers = numpy.zeros(original_number_of_fibers) cluster_count = numpy.zeros(original_number_of_fibers) cluster_idx = 0 for index_list in curr_indices: indices = numpy.array(index_list).astype(int) cluster_numbers[indices] = cluster_idx cluster_count[indices] = curr_count[cluster_idx] cluster_idx += 1 outclusters = vtk.vtkFloatArray() outclusters.SetName('ClusterNumber') for cluster in cluster_numbers: outclusters.InsertNextTuple1(cluster) inpd.GetCellData().AddArray(outclusters) inpd.GetCellData().SetActiveScalars('ClusterNumber') return outpd, numpy.array(curr_count), inpd, cluster_numbers, cluster_count
def anisotropic_smooth(inpd, fiber_distance_threshold, points_per_fiber=30, n_jobs=2, cluster_max=10): """ Average nearby fibers. The pairwise fiber distance matrix is computed, then fibers are averaged with their neighbors until an edge (>max_fiber_distance) is encountered. """ # polydata to array conversion, fixed-length fiber representation current_fiber_array = fibers.FiberArray() current_fiber_array.points_per_fiber = points_per_fiber current_fiber_array.convert_from_polydata(inpd) original_number_of_fibers = current_fiber_array.number_of_fibers # fiber list data structure initialization for easy fiber averaging curr_count = list() curr_fibers = list() curr_indices = list() for lidx in range(0, current_fiber_array.number_of_fibers): curr_fibers.append(current_fiber_array.get_fiber(lidx)) curr_count.append(1) curr_indices.append(list([lidx])) converged = False iteration_count = 0 while not converged: print("<filter.py> ITERATION:", iteration_count, "SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count))) print("<filter.py> number indices", len(curr_indices)) # fiber data structures for output of this iteration next_fibers = list() next_count = list() next_indices = list() # information for this iteration done = numpy.zeros(current_fiber_array.number_of_fibers) fiber_indices = list(range(0, current_fiber_array.number_of_fibers)) # if the maximum number of fibers have been combined, stop averaging this fiber done[numpy.nonzero(numpy.array(curr_count) >= cluster_max)] = 1 # pairwise distance matrix if USE_PARALLEL: distances = Parallel(n_jobs=n_jobs, verbose=1)(delayed(similarity.fiber_distance)( current_fiber_array.get_fiber(lidx), current_fiber_array, 0, 'Hausdorff') for lidx in fiber_indices) distances = numpy.array(distances) else: distances = \ numpy.zeros( (current_fiber_array.number_of_fibers, current_fiber_array.number_of_fibers)) for lidx in fiber_indices: distances[lidx, :] = \ similarity.fiber_distance( current_fiber_array.get_fiber(lidx), current_fiber_array, 0, 'Hausdorff') # distances to self are not of interest for lidx in fiber_indices: distances[lidx, lidx] = numpy.inf # sort the pairwise distances. distances_flat = distances.flatten() pair_order = numpy.argsort(distances_flat) print("<filter.py> DISTANCE MIN:", distances_flat[pair_order[0]], \ "DISTANCE COUNT:", distances.shape) # if the smallest distance is greater or equal to the # threshold, we have converged if distances_flat[pair_order[0]] >= fiber_distance_threshold: converged = True print("<filter.py> CONVERGED") break else: print("<filter.py> NOT CONVERGED") # loop variables idx = 0 pair_idx = pair_order[idx] number_of_fibers = distances.shape[0] number_averages = 0 # combine nearest neighbors unless done, until hit threshold while distances_flat[pair_idx] < fiber_distance_threshold: # find the fiber indices corresponding to this pairwise distance # use div and mod f_row = pair_idx / number_of_fibers f_col = pair_idx % number_of_fibers # check if this neighbor pair can be combined combine = (not done[f_row]) and (not done[f_col]) if combine: done[f_row] += 1 done[f_col] += 1 # weighted average of the fibers (depending on how many each one represents) next_fibers.append( (curr_fibers[f_row] * curr_count[f_row] + \ curr_fibers[f_col] *curr_count[f_col]) / \ (curr_count[f_row] + curr_count[f_col])) # this was the regular average #next_fibers.append((curr_fibers[f_row] + curr_fibers[f_col])/2) next_count.append(curr_count[f_row] + curr_count[f_col]) number_averages += 1 #next_indices.append(list([curr_indices[f_row], curr_indices[f_col]])) next_indices.append( list(curr_indices[f_row] + curr_indices[f_col])) # increment for the loop idx += 1 pair_idx = pair_order[idx] # copy through any unvisited (already converged) fibers unvisited = numpy.nonzero(done == 0)[0] for fidx in unvisited: next_fibers.append(curr_fibers[fidx]) next_count.append(curr_count[fidx]) next_indices.append(curr_indices[fidx]) # set up for next iteration curr_fibers = next_fibers curr_count = next_count curr_indices = next_indices iteration_count += 1 # set up array for next iteration distance computation current_fiber_array = fibers.FiberArray() current_fiber_array.number_of_fibers = len(curr_fibers) current_fiber_array.points_per_fiber = points_per_fiber dims = [ current_fiber_array.number_of_fibers, current_fiber_array.points_per_fiber ] # fiber data current_fiber_array.fiber_array_r = numpy.zeros(dims) current_fiber_array.fiber_array_a = numpy.zeros(dims) current_fiber_array.fiber_array_s = numpy.zeros(dims) curr_fidx = 0 for curr_fib in curr_fibers: current_fiber_array.fiber_array_r[curr_fidx] = curr_fib.r current_fiber_array.fiber_array_a[curr_fidx] = curr_fib.a current_fiber_array.fiber_array_s[curr_fidx] = curr_fib.s curr_fidx += 1 print("<filter.py> SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count)), "SUM DONE FIBERS:", numpy.sum(done)) print("<filter.py> MAX COUNT:", numpy.max(numpy.array(curr_count)), "AVGS THIS ITER:", number_averages) # when converged, convert output to polydata outpd = current_fiber_array.convert_to_polydata() # color output by the number of fibers that each output fiber corresponds to outcolors = vtk.vtkFloatArray() outcolors.SetName('FiberTotal') for count in curr_count: outcolors.InsertNextTuple1(count) outpd.GetCellData().SetScalars(outcolors) # also color the input pd by output cluster number cluster_numbers = numpy.zeros(original_number_of_fibers) cluster_count = numpy.zeros(original_number_of_fibers) cluster_idx = 0 for index_list in curr_indices: indices = numpy.array(index_list).astype(int) cluster_numbers[indices] = cluster_idx cluster_count[indices] = curr_count[cluster_idx] cluster_idx += 1 outclusters = vtk.vtkFloatArray() outclusters.SetName('ClusterNumber') for cluster in cluster_numbers: outclusters.InsertNextTuple1(cluster) inpd.GetCellData().AddArray(outclusters) inpd.GetCellData().SetActiveScalars('ClusterNumber') return outpd, numpy.array(curr_count), inpd, cluster_numbers, cluster_count
TOTAL_PREDS = y_test_event_label_array.shape[0] y_test_event_label_array = y_test_event_label_array.reshape( (TOTAL_PREDS // OUTPUT_EVENT_NUM, OUTPUT_EVENT_NUM)) y_regression_pred1_event_labels = y_regression_pred1_event_labels.reshape( (TOTAL_PREDS // OUTPUT_EVENT_NUM, OUTPUT_EVENT_NUM)) # y_regression_pred1_event_labels = np.asarray(y_regression_pred1_event_labels) print(y_regression_pred1_event_labels) print("y_regression_pred1_event_labels shape") print(y_regression_pred1_event_labels.shape) #let's save the predictions pred_data = { "prediction": list(y_regression_pred1_event_labels.flatten()), "actual": list(y_test_event_label_array.flatten()) } #make pred df pred_df = pd.DataFrame(data=pred_data) print(pred_df) # sys.exit(0) #save data output_fp = main_output_dir + "predictions-vs-ground-truth.csv" pred_df.to_csv(output_fp, index=False) print(output_fp) for TAG in TAGS:
rnn = Parallel(n_jobs=num_cores)( delayed(spir.build_cov)(data, [event], lag, fs) for event in events) rnns += rnn # Limit to 2000 events if total_events > 2000: break # - # ## Compress Rnns # + tmp = list() for rnn in rnns: tmp.append(rnn.flatten() / np.sum(np.diag(rnn))) # Normalize pca = PCA(0.99) pca.fit(tmp) compressed = pca.fit_transform(tmp) print('Number of compressed components: {}'.format(compressed.shape[1])) # - # ## Perform K-means clustering # + ## Find n-clusters def calculate_WSS(points, kmax): sse = [] for k in range(1, kmax + 1): kmeans = KMeans(n_clusters=k).fit(points) centroids = kmeans.cluster_centers_