Python Parallel.flatten Examples

Programming Language: Python

Namespace/Package Name: joblib

Class/Type: Parallel

Method/Function: flatten

Examples at hotexamples.com: 4

Python Parallel.flatten - 4 examples found. These are the top rated real world Python examples of joblib.Parallel.flatten extracted from open source projects. You can rate examples to help us improve the quality of examples.

Frequently Used Methods

Show Hide

Parallel(30)

append(30)

groupby(14)

columns(9)

count(9)

copy(9)

astype(7)

_effective_n_jobs(5)

argmax(5)

__call__(4)

drop(4)

flatten(3)

__exit__(3)

__enter__(3)

argmin(2)

dot(2)

extend(2)

_print(2)

argsort(1)

__len__(1)

apply(1)

drop_duplicates(1)

dropna(1)

filter(1)

assign(1)

Example #1

Show file

File: filter.py Project: RuizhiLiao/whitematteranalysis

def anisotropic_smooth(inpd, fiber_distance_threshold, points_per_fiber=30, n_jobs=2, cluster_max = 10):
    """ Average nearby fibers.
    
    The pairwise fiber distance matrix is computed, then fibers
    are averaged with their neighbors until an edge (>max_fiber_distance) is encountered.

    """

    # polydata to array conversion, fixed-length fiber representation
    current_fiber_array = fibers.FiberArray()
    current_fiber_array.points_per_fiber = points_per_fiber
    current_fiber_array.convert_from_polydata(inpd)
    original_number_of_fibers = current_fiber_array.number_of_fibers
    
    # fiber list data structure initialization for easy fiber averaging
    curr_count = list()
    curr_fibers = list()
    curr_indices = list()
    for lidx in range(0, current_fiber_array.number_of_fibers):
        curr_fibers.append(current_fiber_array.get_fiber(lidx))
        curr_count.append(1)
        curr_indices.append(list([lidx]))
        
    converged = False
    iteration_count = 0
    
    while not converged:
        print "<filter.py> ITERATION:", iteration_count, "SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count))
        print "<filter.py> number indices", len(curr_indices)
        
        # fiber data structures for output of this iteration
        next_fibers = list()
        next_count = list()
        next_indices = list()
        
        # information for this iteration
        done = numpy.zeros(current_fiber_array.number_of_fibers)
        fiber_indices = range(0, current_fiber_array.number_of_fibers)

        # if the maximum number of fibers have been combined, stop averaging this fiber
        done[numpy.nonzero(numpy.array(curr_count) >= cluster_max)] = 1
        
        # pairwise distance matrix
        if USE_PARALLEL:
            distances = Parallel(n_jobs=n_jobs, verbose=1)(
                delayed(similarity.fiber_distance)(
                current_fiber_array.get_fiber(lidx),
                current_fiber_array,
                0, 'Hausdorff')
                for lidx in fiber_indices)
            distances = numpy.array(distances)
        else:
            distances = \
                numpy.zeros(
                (current_fiber_array.number_of_fibers,
                 current_fiber_array.number_of_fibers))
            for lidx in fiber_indices:
                distances[lidx, :] = \
                    similarity.fiber_distance(
                        current_fiber_array.get_fiber(lidx),
                        current_fiber_array, 0, 'Hausdorff')

        # distances to self are not of interest
        for lidx in fiber_indices:
            distances[lidx,lidx] = numpy.inf
        
        # sort the pairwise distances. 
        distances_flat = distances.flatten()
        pair_order = numpy.argsort(distances_flat)

        print "<filter.py> DISTANCE MIN:", distances_flat[pair_order[0]], \
            "DISTANCE COUNT:", distances.shape

        # if the smallest distance is greater or equal to the
        # threshold, we have converged
        if distances_flat[pair_order[0]] >= fiber_distance_threshold:
            converged = True
            print "<filter.py> CONVERGED"
            break
        else:
            print "<filter.py> NOT CONVERGED"
            
        # loop variables
        idx = 0
        pair_idx = pair_order[idx]
        number_of_fibers = distances.shape[0]
        number_averages = 0
        
        # combine nearest neighbors unless done, until hit threshold
        while distances_flat[pair_idx] < fiber_distance_threshold:
            # find the fiber indices corresponding to this pairwise distance
            # use div and mod
            f_row = pair_idx / number_of_fibers
            f_col = pair_idx % number_of_fibers

            # check if this neighbor pair can be combined
            combine = (not done[f_row]) and (not done[f_col])
            if combine :
                done[f_row] += 1
                done[f_col] += 1
                # weighted average of the fibers (depending on how many each one represents)
                next_fibers.append(
                    (curr_fibers[f_row] * curr_count[f_row] + \
                     curr_fibers[f_col] *curr_count[f_col]) / \
                    (curr_count[f_row] + curr_count[f_col]))
                # this was the regular average
                #next_fibers.append((curr_fibers[f_row] + curr_fibers[f_col])/2)
                next_count.append(curr_count[f_row] + curr_count[f_col])
                number_averages += 1
                #next_indices.append(list([curr_indices[f_row], curr_indices[f_col]]))
                next_indices.append(list(curr_indices[f_row] + curr_indices[f_col]))
                
            # increment for the loop
            idx += 1
            pair_idx = pair_order[idx]

        # copy through any unvisited (already converged) fibers
        unvisited = numpy.nonzero(done==0)[0]
        for fidx in unvisited:
            next_fibers.append(curr_fibers[fidx])
            next_count.append(curr_count[fidx])
            next_indices.append(curr_indices[fidx])
            
        # set up for next iteration
        curr_fibers = next_fibers
        curr_count = next_count
        curr_indices = next_indices
        iteration_count += 1

        # set up array for next iteration distance computation
        current_fiber_array = fibers.FiberArray()    
        current_fiber_array.number_of_fibers = len(curr_fibers)
        current_fiber_array.points_per_fiber = points_per_fiber
        dims = [current_fiber_array.number_of_fibers, current_fiber_array.points_per_fiber]
        # fiber data
        current_fiber_array.fiber_array_r = numpy.zeros(dims)
        current_fiber_array.fiber_array_a = numpy.zeros(dims)
        current_fiber_array.fiber_array_s = numpy.zeros(dims)
        curr_fidx = 0
        for curr_fib in curr_fibers:
            current_fiber_array.fiber_array_r[curr_fidx] = curr_fib.r
            current_fiber_array.fiber_array_a[curr_fidx] = curr_fib.a
            current_fiber_array.fiber_array_s[curr_fidx] = curr_fib.s
            curr_fidx += 1

        print "<filter.py> SUM FIBER COUNTS:", numpy.sum(numpy.array(curr_count)), "SUM DONE FIBERS:", numpy.sum(done)
        print "<filter.py> MAX COUNT:" , numpy.max(numpy.array(curr_count)), "AVGS THIS ITER:", number_averages

    # when converged, convert output to polydata
    outpd = current_fiber_array.convert_to_polydata()

    # color output by the number of fibers that each output fiber corresponds to
    outcolors = vtk.vtkFloatArray()
    outcolors.SetName('FiberTotal')
    for count in curr_count:
        outcolors.InsertNextTuple1(count)
    outpd.GetCellData().SetScalars(outcolors)

    # also color the input pd by output cluster number
    cluster_numbers = numpy.zeros(original_number_of_fibers)
    cluster_count = numpy.zeros(original_number_of_fibers)
    cluster_idx = 0
    for index_list in curr_indices:
        indices = numpy.array(index_list).astype(int)
        cluster_numbers[indices] = cluster_idx
        cluster_count[indices] = curr_count[cluster_idx]
        cluster_idx += 1
    outclusters =  vtk.vtkFloatArray()
    outclusters.SetName('ClusterNumber')
    for cluster in cluster_numbers:
        outclusters.InsertNextTuple1(cluster)
    inpd.GetCellData().AddArray(outclusters)
    inpd.GetCellData().SetActiveScalars('ClusterNumber')

    return outpd, numpy.array(curr_count), inpd, cluster_numbers, cluster_count

Example #2

Show file

File: filter.py Project: delucaal/whitematteranalysis

def anisotropic_smooth(inpd,
                       fiber_distance_threshold,
                       points_per_fiber=30,
                       n_jobs=2,
                       cluster_max=10):
    """ Average nearby fibers.
    
    The pairwise fiber distance matrix is computed, then fibers
    are averaged with their neighbors until an edge (>max_fiber_distance) is encountered.

    """

    # polydata to array conversion, fixed-length fiber representation
    current_fiber_array = fibers.FiberArray()
    current_fiber_array.points_per_fiber = points_per_fiber
    current_fiber_array.convert_from_polydata(inpd)
    original_number_of_fibers = current_fiber_array.number_of_fibers

    # fiber list data structure initialization for easy fiber averaging
    curr_count = list()
    curr_fibers = list()
    curr_indices = list()
    for lidx in range(0, current_fiber_array.number_of_fibers):
        curr_fibers.append(current_fiber_array.get_fiber(lidx))
        curr_count.append(1)
        curr_indices.append(list([lidx]))

    converged = False
    iteration_count = 0

    while not converged:
        print("<filter.py> ITERATION:", iteration_count, "SUM FIBER COUNTS:",
              numpy.sum(numpy.array(curr_count)))
        print("<filter.py> number indices", len(curr_indices))

        # fiber data structures for output of this iteration
        next_fibers = list()
        next_count = list()
        next_indices = list()

        # information for this iteration
        done = numpy.zeros(current_fiber_array.number_of_fibers)
        fiber_indices = list(range(0, current_fiber_array.number_of_fibers))

        # if the maximum number of fibers have been combined, stop averaging this fiber
        done[numpy.nonzero(numpy.array(curr_count) >= cluster_max)] = 1

        # pairwise distance matrix
        if USE_PARALLEL:
            distances = Parallel(n_jobs=n_jobs,
                                 verbose=1)(delayed(similarity.fiber_distance)(
                                     current_fiber_array.get_fiber(lidx),
                                     current_fiber_array, 0, 'Hausdorff')
                                            for lidx in fiber_indices)
            distances = numpy.array(distances)
        else:
            distances = \
                numpy.zeros(
                (current_fiber_array.number_of_fibers,
                 current_fiber_array.number_of_fibers))
            for lidx in fiber_indices:
                distances[lidx, :] = \
                    similarity.fiber_distance(
                        current_fiber_array.get_fiber(lidx),
                        current_fiber_array, 0, 'Hausdorff')

        # distances to self are not of interest
        for lidx in fiber_indices:
            distances[lidx, lidx] = numpy.inf

        # sort the pairwise distances.
        distances_flat = distances.flatten()
        pair_order = numpy.argsort(distances_flat)

        print("<filter.py> DISTANCE MIN:", distances_flat[pair_order[0]], \
            "DISTANCE COUNT:", distances.shape)

        # if the smallest distance is greater or equal to the
        # threshold, we have converged
        if distances_flat[pair_order[0]] >= fiber_distance_threshold:
            converged = True
            print("<filter.py> CONVERGED")
            break
        else:
            print("<filter.py> NOT CONVERGED")

        # loop variables
        idx = 0
        pair_idx = pair_order[idx]
        number_of_fibers = distances.shape[0]
        number_averages = 0

        # combine nearest neighbors unless done, until hit threshold
        while distances_flat[pair_idx] < fiber_distance_threshold:
            # find the fiber indices corresponding to this pairwise distance
            # use div and mod
            f_row = pair_idx / number_of_fibers
            f_col = pair_idx % number_of_fibers

            # check if this neighbor pair can be combined
            combine = (not done[f_row]) and (not done[f_col])
            if combine:
                done[f_row] += 1
                done[f_col] += 1
                # weighted average of the fibers (depending on how many each one represents)
                next_fibers.append(
                    (curr_fibers[f_row] * curr_count[f_row] + \
                     curr_fibers[f_col] *curr_count[f_col]) / \
                    (curr_count[f_row] + curr_count[f_col]))
                # this was the regular average
                #next_fibers.append((curr_fibers[f_row] + curr_fibers[f_col])/2)
                next_count.append(curr_count[f_row] + curr_count[f_col])
                number_averages += 1
                #next_indices.append(list([curr_indices[f_row], curr_indices[f_col]]))
                next_indices.append(
                    list(curr_indices[f_row] + curr_indices[f_col]))

            # increment for the loop
            idx += 1
            pair_idx = pair_order[idx]

        # copy through any unvisited (already converged) fibers
        unvisited = numpy.nonzero(done == 0)[0]
        for fidx in unvisited:
            next_fibers.append(curr_fibers[fidx])
            next_count.append(curr_count[fidx])
            next_indices.append(curr_indices[fidx])

        # set up for next iteration
        curr_fibers = next_fibers
        curr_count = next_count
        curr_indices = next_indices
        iteration_count += 1

        # set up array for next iteration distance computation
        current_fiber_array = fibers.FiberArray()
        current_fiber_array.number_of_fibers = len(curr_fibers)
        current_fiber_array.points_per_fiber = points_per_fiber
        dims = [
            current_fiber_array.number_of_fibers,
            current_fiber_array.points_per_fiber
        ]
        # fiber data
        current_fiber_array.fiber_array_r = numpy.zeros(dims)
        current_fiber_array.fiber_array_a = numpy.zeros(dims)
        current_fiber_array.fiber_array_s = numpy.zeros(dims)
        curr_fidx = 0
        for curr_fib in curr_fibers:
            current_fiber_array.fiber_array_r[curr_fidx] = curr_fib.r
            current_fiber_array.fiber_array_a[curr_fidx] = curr_fib.a
            current_fiber_array.fiber_array_s[curr_fidx] = curr_fib.s
            curr_fidx += 1

        print("<filter.py> SUM FIBER COUNTS:",
              numpy.sum(numpy.array(curr_count)), "SUM DONE FIBERS:",
              numpy.sum(done))
        print("<filter.py> MAX COUNT:", numpy.max(numpy.array(curr_count)),
              "AVGS THIS ITER:", number_averages)

    # when converged, convert output to polydata
    outpd = current_fiber_array.convert_to_polydata()

    # color output by the number of fibers that each output fiber corresponds to
    outcolors = vtk.vtkFloatArray()
    outcolors.SetName('FiberTotal')
    for count in curr_count:
        outcolors.InsertNextTuple1(count)
    outpd.GetCellData().SetScalars(outcolors)

    # also color the input pd by output cluster number
    cluster_numbers = numpy.zeros(original_number_of_fibers)
    cluster_count = numpy.zeros(original_number_of_fibers)
    cluster_idx = 0
    for index_list in curr_indices:
        indices = numpy.array(index_list).astype(int)
        cluster_numbers[indices] = cluster_idx
        cluster_count[indices] = curr_count[cluster_idx]
        cluster_idx += 1
    outclusters = vtk.vtkFloatArray()
    outclusters.SetName('ClusterNumber')
    for cluster in cluster_numbers:
        outclusters.InsertNextTuple1(cluster)
    inpd.GetCellData().AddArray(outclusters)
    inpd.GetCellData().SetActiveScalars('ClusterNumber')

    return outpd, numpy.array(curr_count), inpd, cluster_numbers, cluster_count

Example #3

Show file

File: s9-v2-linear-reg-forloop.py Project: fmubang/smm-final-fa2019

                TOTAL_PREDS = y_test_event_label_array.shape[0]

                y_test_event_label_array = y_test_event_label_array.reshape(
                    (TOTAL_PREDS // OUTPUT_EVENT_NUM, OUTPUT_EVENT_NUM))
                y_regression_pred1_event_labels = y_regression_pred1_event_labels.reshape(
                    (TOTAL_PREDS // OUTPUT_EVENT_NUM, OUTPUT_EVENT_NUM))

                # y_regression_pred1_event_labels = np.asarray(y_regression_pred1_event_labels)
                print(y_regression_pred1_event_labels)
                print("y_regression_pred1_event_labels shape")
                print(y_regression_pred1_event_labels.shape)

                #let's save the predictions
                pred_data = {
                    "prediction":
                    list(y_regression_pred1_event_labels.flatten()),
                    "actual": list(y_test_event_label_array.flatten())
                }

                #make pred df
                pred_df = pd.DataFrame(data=pred_data)
                print(pred_df)

                # sys.exit(0)

                #save data
                output_fp = main_output_dir + "predictions-vs-ground-truth.csv"
                pred_df.to_csv(output_fp, index=False)
                print(output_fp)

                for TAG in TAGS:

Example #4

Show file

File: 1-wiener-training.py Project: taotianli/irregulars-neureka-codebase

        rnn = Parallel(n_jobs=num_cores)(
            delayed(spir.build_cov)(data, [event], lag, fs)
            for event in events)
        rnns += rnn

    # Limit to 2000 events
    if total_events > 2000:
        break

# -
# ## Compress Rnns
# +
tmp = list()
for rnn in rnns:
    tmp.append(rnn.flatten() / np.sum(np.diag(rnn)))  # Normalize
pca = PCA(0.99)
pca.fit(tmp)
compressed = pca.fit_transform(tmp)
print('Number of compressed components: {}'.format(compressed.shape[1]))


# -
# ## Perform K-means clustering
# +
## Find n-clusters
def calculate_WSS(points, kmax):
    sse = []
    for k in range(1, kmax + 1):
        kmeans = KMeans(n_clusters=k).fit(points)
        centroids = kmeans.cluster_centers_