Example #1
0
def anisotropic_smooth(inpd, fiber_distance_threshold, points_per_fiber=30, n_jobs=2, cluster_max = 10):
    """ Average nearby fibers.
    
    The pairwise fiber distance matrix is computed, then fibers
    are averaged with their neighbors until an edge (>max_fiber_distance) is encountered.

    """

    # polydata to array conversion, fixed-length fiber representation
    current_fiber_array = fibers.FiberArray()
    current_fiber_array.points_per_fiber = points_per_fiber
    current_fiber_array.convert_from_polydata(inpd)
    original_number_of_fibers = current_fiber_array.number_of_fibers
    
    # fiber list data structure initialization for easy fiber averaging
    curr_count = list()
    curr_fibers = list()
    curr_indices = list()
    for lidx in range(0, current_fiber_array.number_of_fibers):
        curr_fibers.append(current_fiber_array.get_fiber(lidx))
        curr_count.append(1)
        curr_indices.append(list([lidx]))
        
    converged = False
    iteration_count = 0
    
    while not converged:
        print "<filter.py> ITERATION:", iteration_count, "SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count))
        print "<filter.py> number indices", len(curr_indices)
        
        # fiber data structures for output of this iteration
        next_fibers = list()
        next_count = list()
        next_indices = list()
        
        # information for this iteration
        done = numpy.zeros(current_fiber_array.number_of_fibers)
        fiber_indices = range(0, current_fiber_array.number_of_fibers)

        # if the maximum number of fibers have been combined, stop averaging this fiber
        done[numpy.nonzero(numpy.array(curr_count) >= cluster_max)] = 1
        
        # pairwise distance matrix
        if USE_PARALLEL:
            distances = Parallel(n_jobs=n_jobs, verbose=1)(
                delayed(similarity.fiber_distance)(
                current_fiber_array.get_fiber(lidx),
                current_fiber_array,
                0, 'Hausdorff')
                for lidx in fiber_indices)
            distances = numpy.array(distances)
        else:
            distances = \
                numpy.zeros(
                (current_fiber_array.number_of_fibers,
                 current_fiber_array.number_of_fibers))
            for lidx in fiber_indices:
                distances[lidx, :] = \
                    similarity.fiber_distance(
                        current_fiber_array.get_fiber(lidx),
                        current_fiber_array, 0, 'Hausdorff')

        # distances to self are not of interest
        for lidx in fiber_indices:
            distances[lidx,lidx] = numpy.inf
        
        # sort the pairwise distances. 
        distances_flat = distances.flatten()
        pair_order = numpy.argsort(distances_flat)

        print "<filter.py> DISTANCE MIN:", distances_flat[pair_order[0]], \
            "DISTANCE COUNT:", distances.shape

        # if the smallest distance is greater or equal to the
        # threshold, we have converged
        if distances_flat[pair_order[0]] >= fiber_distance_threshold:
            converged = True
            print "<filter.py> CONVERGED"
            break
        else:
            print "<filter.py> NOT CONVERGED"
            
        # loop variables
        idx = 0
        pair_idx = pair_order[idx]
        number_of_fibers = distances.shape[0]
        number_averages = 0
        
        # combine nearest neighbors unless done, until hit threshold
        while distances_flat[pair_idx] < fiber_distance_threshold:
            # find the fiber indices corresponding to this pairwise distance
            # use div and mod
            f_row = pair_idx / number_of_fibers
            f_col = pair_idx % number_of_fibers

            # check if this neighbor pair can be combined
            combine = (not done[f_row]) and (not done[f_col])
            if combine :
                done[f_row] += 1
                done[f_col] += 1
                # weighted average of the fibers (depending on how many each one represents)
                next_fibers.append(
                    (curr_fibers[f_row] * curr_count[f_row] + \
                     curr_fibers[f_col] *curr_count[f_col]) / \
                    (curr_count[f_row] + curr_count[f_col]))
                # this was the regular average
                #next_fibers.append((curr_fibers[f_row] + curr_fibers[f_col])/2)
                next_count.append(curr_count[f_row] + curr_count[f_col])
                number_averages += 1
                #next_indices.append(list([curr_indices[f_row], curr_indices[f_col]]))
                next_indices.append(list(curr_indices[f_row] + curr_indices[f_col]))
                
            # increment for the loop
            idx += 1
            pair_idx = pair_order[idx]

        # copy through any unvisited (already converged) fibers
        unvisited = numpy.nonzero(done==0)[0]
        for fidx in unvisited:
            next_fibers.append(curr_fibers[fidx])
            next_count.append(curr_count[fidx])
            next_indices.append(curr_indices[fidx])
            
        # set up for next iteration
        curr_fibers = next_fibers
        curr_count = next_count
        curr_indices = next_indices
        iteration_count += 1

        # set up array for next iteration distance computation
        current_fiber_array = fibers.FiberArray()    
        current_fiber_array.number_of_fibers = len(curr_fibers)
        current_fiber_array.points_per_fiber = points_per_fiber
        dims = [current_fiber_array.number_of_fibers, current_fiber_array.points_per_fiber]
        # fiber data
        current_fiber_array.fiber_array_r = numpy.zeros(dims)
        current_fiber_array.fiber_array_a = numpy.zeros(dims)
        current_fiber_array.fiber_array_s = numpy.zeros(dims)
        curr_fidx = 0
        for curr_fib in curr_fibers:
            current_fiber_array.fiber_array_r[curr_fidx] = curr_fib.r
            current_fiber_array.fiber_array_a[curr_fidx] = curr_fib.a
            current_fiber_array.fiber_array_s[curr_fidx] = curr_fib.s
            curr_fidx += 1

        print "<filter.py> SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count)), "SUM DONE FIBERS:", numpy.sum(done)
        print "<filter.py> MAX COUNT:" , numpy.max(numpy.array(curr_count)), "AVGS THIS ITER:", number_averages

    # when converged, convert output to polydata
    outpd = current_fiber_array.convert_to_polydata()

    # color output by the number of fibers that each output fiber corresponds to
    outcolors = vtk.vtkFloatArray()
    outcolors.SetName('FiberTotal')
    for count in curr_count:
        outcolors.InsertNextTuple1(count)
    outpd.GetCellData().SetScalars(outcolors)

    # also color the input pd by output cluster number
    cluster_numbers = numpy.zeros(original_number_of_fibers)
    cluster_count = numpy.zeros(original_number_of_fibers)
    cluster_idx = 0
    for index_list in curr_indices:
        indices = numpy.array(index_list).astype(int)
        cluster_numbers[indices] = cluster_idx
        cluster_count[indices] = curr_count[cluster_idx]
        cluster_idx += 1
    outclusters =  vtk.vtkFloatArray()
    outclusters.SetName('ClusterNumber')
    for cluster in cluster_numbers:
        outclusters.InsertNextTuple1(cluster)
    inpd.GetCellData().AddArray(outclusters)
    inpd.GetCellData().SetActiveScalars('ClusterNumber')

    return outpd, numpy.array(curr_count), inpd, cluster_numbers, cluster_count
Example #2
0
def anisotropic_smooth(inpd,
                       fiber_distance_threshold,
                       points_per_fiber=30,
                       n_jobs=2,
                       cluster_max=10):
    """ Average nearby fibers.
    
    The pairwise fiber distance matrix is computed, then fibers
    are averaged with their neighbors until an edge (>max_fiber_distance) is encountered.

    """

    # polydata to array conversion, fixed-length fiber representation
    current_fiber_array = fibers.FiberArray()
    current_fiber_array.points_per_fiber = points_per_fiber
    current_fiber_array.convert_from_polydata(inpd)
    original_number_of_fibers = current_fiber_array.number_of_fibers

    # fiber list data structure initialization for easy fiber averaging
    curr_count = list()
    curr_fibers = list()
    curr_indices = list()
    for lidx in range(0, current_fiber_array.number_of_fibers):
        curr_fibers.append(current_fiber_array.get_fiber(lidx))
        curr_count.append(1)
        curr_indices.append(list([lidx]))

    converged = False
    iteration_count = 0

    while not converged:
        print("<filter.py> ITERATION:", iteration_count, "SUM FIBER COUNTS:",
              numpy.sum(numpy.array(curr_count)))
        print("<filter.py> number indices", len(curr_indices))

        # fiber data structures for output of this iteration
        next_fibers = list()
        next_count = list()
        next_indices = list()

        # information for this iteration
        done = numpy.zeros(current_fiber_array.number_of_fibers)
        fiber_indices = list(range(0, current_fiber_array.number_of_fibers))

        # if the maximum number of fibers have been combined, stop averaging this fiber
        done[numpy.nonzero(numpy.array(curr_count) >= cluster_max)] = 1

        # pairwise distance matrix
        if USE_PARALLEL:
            distances = Parallel(n_jobs=n_jobs,
                                 verbose=1)(delayed(similarity.fiber_distance)(
                                     current_fiber_array.get_fiber(lidx),
                                     current_fiber_array, 0, 'Hausdorff')
                                            for lidx in fiber_indices)
            distances = numpy.array(distances)
        else:
            distances = \
                numpy.zeros(
                (current_fiber_array.number_of_fibers,
                 current_fiber_array.number_of_fibers))
            for lidx in fiber_indices:
                distances[lidx, :] = \
                    similarity.fiber_distance(
                        current_fiber_array.get_fiber(lidx),
                        current_fiber_array, 0, 'Hausdorff')

        # distances to self are not of interest
        for lidx in fiber_indices:
            distances[lidx, lidx] = numpy.inf

        # sort the pairwise distances.
        distances_flat = distances.flatten()
        pair_order = numpy.argsort(distances_flat)

        print("<filter.py> DISTANCE MIN:", distances_flat[pair_order[0]], \
            "DISTANCE COUNT:", distances.shape)

        # if the smallest distance is greater or equal to the
        # threshold, we have converged
        if distances_flat[pair_order[0]] >= fiber_distance_threshold:
            converged = True
            print("<filter.py> CONVERGED")
            break
        else:
            print("<filter.py> NOT CONVERGED")

        # loop variables
        idx = 0
        pair_idx = pair_order[idx]
        number_of_fibers = distances.shape[0]
        number_averages = 0

        # combine nearest neighbors unless done, until hit threshold
        while distances_flat[pair_idx] < fiber_distance_threshold:
            # find the fiber indices corresponding to this pairwise distance
            # use div and mod
            f_row = pair_idx / number_of_fibers
            f_col = pair_idx % number_of_fibers

            # check if this neighbor pair can be combined
            combine = (not done[f_row]) and (not done[f_col])
            if combine:
                done[f_row] += 1
                done[f_col] += 1
                # weighted average of the fibers (depending on how many each one represents)
                next_fibers.append(
                    (curr_fibers[f_row] * curr_count[f_row] + \
                     curr_fibers[f_col] *curr_count[f_col]) / \
                    (curr_count[f_row] + curr_count[f_col]))
                # this was the regular average
                #next_fibers.append((curr_fibers[f_row] + curr_fibers[f_col])/2)
                next_count.append(curr_count[f_row] + curr_count[f_col])
                number_averages += 1
                #next_indices.append(list([curr_indices[f_row], curr_indices[f_col]]))
                next_indices.append(
                    list(curr_indices[f_row] + curr_indices[f_col]))

            # increment for the loop
            idx += 1
            pair_idx = pair_order[idx]

        # copy through any unvisited (already converged) fibers
        unvisited = numpy.nonzero(done == 0)[0]
        for fidx in unvisited:
            next_fibers.append(curr_fibers[fidx])
            next_count.append(curr_count[fidx])
            next_indices.append(curr_indices[fidx])

        # set up for next iteration
        curr_fibers = next_fibers
        curr_count = next_count
        curr_indices = next_indices
        iteration_count += 1

        # set up array for next iteration distance computation
        current_fiber_array = fibers.FiberArray()
        current_fiber_array.number_of_fibers = len(curr_fibers)
        current_fiber_array.points_per_fiber = points_per_fiber
        dims = [
            current_fiber_array.number_of_fibers,
            current_fiber_array.points_per_fiber
        ]
        # fiber data
        current_fiber_array.fiber_array_r = numpy.zeros(dims)
        current_fiber_array.fiber_array_a = numpy.zeros(dims)
        current_fiber_array.fiber_array_s = numpy.zeros(dims)
        curr_fidx = 0
        for curr_fib in curr_fibers:
            current_fiber_array.fiber_array_r[curr_fidx] = curr_fib.r
            current_fiber_array.fiber_array_a[curr_fidx] = curr_fib.a
            current_fiber_array.fiber_array_s[curr_fidx] = curr_fib.s
            curr_fidx += 1

        print("<filter.py> SUM FIBER COUNTS:",
              numpy.sum(numpy.array(curr_count)), "SUM DONE FIBERS:",
              numpy.sum(done))
        print("<filter.py> MAX COUNT:", numpy.max(numpy.array(curr_count)),
              "AVGS THIS ITER:", number_averages)

    # when converged, convert output to polydata
    outpd = current_fiber_array.convert_to_polydata()

    # color output by the number of fibers that each output fiber corresponds to
    outcolors = vtk.vtkFloatArray()
    outcolors.SetName('FiberTotal')
    for count in curr_count:
        outcolors.InsertNextTuple1(count)
    outpd.GetCellData().SetScalars(outcolors)

    # also color the input pd by output cluster number
    cluster_numbers = numpy.zeros(original_number_of_fibers)
    cluster_count = numpy.zeros(original_number_of_fibers)
    cluster_idx = 0
    for index_list in curr_indices:
        indices = numpy.array(index_list).astype(int)
        cluster_numbers[indices] = cluster_idx
        cluster_count[indices] = curr_count[cluster_idx]
        cluster_idx += 1
    outclusters = vtk.vtkFloatArray()
    outclusters.SetName('ClusterNumber')
    for cluster in cluster_numbers:
        outclusters.InsertNextTuple1(cluster)
    inpd.GetCellData().AddArray(outclusters)
    inpd.GetCellData().SetActiveScalars('ClusterNumber')

    return outpd, numpy.array(curr_count), inpd, cluster_numbers, cluster_count
                TOTAL_PREDS = y_test_event_label_array.shape[0]

                y_test_event_label_array = y_test_event_label_array.reshape(
                    (TOTAL_PREDS // OUTPUT_EVENT_NUM, OUTPUT_EVENT_NUM))
                y_regression_pred1_event_labels = y_regression_pred1_event_labels.reshape(
                    (TOTAL_PREDS // OUTPUT_EVENT_NUM, OUTPUT_EVENT_NUM))

                # y_regression_pred1_event_labels = np.asarray(y_regression_pred1_event_labels)
                print(y_regression_pred1_event_labels)
                print("y_regression_pred1_event_labels shape")
                print(y_regression_pred1_event_labels.shape)

                #let's save the predictions
                pred_data = {
                    "prediction":
                    list(y_regression_pred1_event_labels.flatten()),
                    "actual": list(y_test_event_label_array.flatten())
                }

                #make pred df
                pred_df = pd.DataFrame(data=pred_data)
                print(pred_df)

                # sys.exit(0)

                #save data
                output_fp = main_output_dir + "predictions-vs-ground-truth.csv"
                pred_df.to_csv(output_fp, index=False)
                print(output_fp)

                for TAG in TAGS:
        rnn = Parallel(n_jobs=num_cores)(
            delayed(spir.build_cov)(data, [event], lag, fs)
            for event in events)
        rnns += rnn

    # Limit to 2000 events
    if total_events > 2000:
        break

# -
# ## Compress Rnns
# +
tmp = list()
for rnn in rnns:
    tmp.append(rnn.flatten() / np.sum(np.diag(rnn)))  # Normalize
pca = PCA(0.99)
pca.fit(tmp)
compressed = pca.fit_transform(tmp)
print('Number of compressed components: {}'.format(compressed.shape[1]))


# -
# ## Perform K-means clustering
# +
## Find n-clusters
def calculate_WSS(points, kmax):
    sse = []
    for k in range(1, kmax + 1):
        kmeans = KMeans(n_clusters=k).fit(points)
        centroids = kmeans.cluster_centers_