Python chunks Examples

Programming Language: Python

Namespace/Package Name: tractseg.libs.utils

Method/Function: chunks

Examples at hotexamples.com: 3

Python chunks - 3 examples found. These are the top rated real world Python examples of tractseg.libs.utils.chunks extracted from open source projects. You can rate examples to help us improve the quality of examples.

Example #1

Show file

def get_cv_fold(fold, dataset="HCP"):
    '''
    Brauche train-test-validate wegen Best-model selection und wegen training von combined net
    :return:
    '''

    #For CV
    if fold == 0:
        train, validate, test = [0, 1, 2], [3], [4]
        # train, validate, test = [0, 1, 2, 3, 4], [3], [4]
    elif fold == 1:
        train, validate, test = [1, 2, 3], [4], [0]
    elif fold == 2:
        train, validate, test = [2, 3, 4], [0], [1]
    elif fold == 3:
        train, validate, test = [3, 4, 0], [1], [2]
    elif fold == 4:
        train, validate, test = [4, 0, 1], [2], [3]

    subjects = get_all_subjects(dataset)

    if dataset.startswith("HCP"):
        # subjects = list(Utils.chunks(subjects[:100], 10))   #10 folds
        subjects = list(utils.chunks(subjects, 21))   #5 folds a 21 subjects
        # => 5 fold CV ok (score only 1%-point worse than 10 folds (80 vs 60 train subjects) (10 Fold CV impractical!)
    elif dataset.startswith("Schizo"):
        # 410 subjects
        subjects = list(utils.chunks(subjects, 82))  # 5 folds a 82 subjects
    else:
        raise ValueError("Invalid dataset name")

    subjects = np.array(subjects)
    return list(subjects[train].flatten()), list(subjects[validate].flatten()), list(subjects[test].flatten())

Example #2

Show file

def get_cv_fold(fold, dataset="HCP"):
    if dataset == "HCP_all":
        subjects = get_all_subjects(dataset)
        cut_point = int(len(subjects) * 0.9)
        return subjects[:cut_point], subjects[cut_point:], ["599671", "599469"]
    elif dataset == "biobank_20k":
        subjects = get_all_subjects(dataset)
        cut_point = int(len(subjects) * 0.9)
        return subjects[:cut_point], subjects[cut_point:], ["1000013", "1000013"]
    else:
        if fold == 0:
            train, validate, test = [0, 1, 2], [3], [4]
        elif fold == 1:
            train, validate, test = [1, 2, 3], [4], [0]
        elif fold == 2:
            train, validate, test = [2, 3, 4], [0], [1]
        elif fold == 3:
            train, validate, test = [3, 4, 0], [1], [2]
        elif fold == 4:
            train, validate, test = [4, 0, 1], [2], [3]

        subjects = get_all_subjects(dataset)

        if dataset.startswith("HCP"):
            subjects = list(utils.chunks(subjects, 21))   #5 folds a 21 subjects
            # 5 fold CV ok (score only 1%-point worse than 10 folds (80 vs 60 train subjects) (10 Fold CV impractical!)
        elif dataset.startswith("Schizo"):
            # ~410 subjects
            subjects = list(utils.chunks(subjects, 82))  # 5 folds a 82 subjects
        else:
            raise ValueError("Invalid dataset name")

        subjects = np.array(subjects)
        return list(subjects[train].flatten()), list(subjects[validate].flatten()), list(subjects[test].flatten())

Example #3

Show file

def compress_streamlines(streamlines, error_threshold=0.1, nr_cpus=-1):
    import psutil
    if nr_cpus == -1:
        nr_processes = psutil.cpu_count()
    else:
        nr_processes = nr_cpus
    number_streamlines = len(streamlines)

    if nr_processes >= number_streamlines:
        nr_processes = number_streamlines - 1
        if nr_processes < 1:
            nr_processes = 1

    chunk_size = int(number_streamlines / nr_processes)

    if chunk_size < 1:
        # logging.warning("\nReturning early because chunk_size=0")
        return streamlines
    fiber_batches = list(utils.chunks(streamlines, chunk_size))

    global _COMPRESSION_ERROR_THRESHOLD
    global _FIBER_BATCHES
    _COMPRESSION_ERROR_THRESHOLD = error_threshold
    _FIBER_BATCHES = fiber_batches

    # logging.debug("Main program using: {} GB".format(round(Utils.mem_usage(print_usage=False), 3)))
    pool = multiprocessing.Pool(processes=nr_processes)

    #Do not pass data in (doubles amount of memory needed), but only idx of shared memory
    #  (needs only as much memory as single thread version (only main thread needs memory, others almost 0).
    #  Shared memory version also faster (around 20-30%?).
    #  Needed otherwise memory problems when processing the raw tracking output (on disk >10GB and in memory >20GB)
    result = pool.map(compress_fibers_worker_shared_mem,
                      range(0, len(fiber_batches)))

    pool.close()
    pool.join()

    streamlines_c = utils.flatten(result)
    return streamlines_c