def bin_data(cnn_output, training_data_src, size = (3, 12, 12), cutoff = 15, pad = True, verbose = False, collect_cnn = False, maxip=0):
    '''collect connected components from CNN and bin into True positives and false positives based on labels

    cnn_output = pth/list of folders containing CNN output
    training_data_src = pth/list of folders containing numpy arrays [c,z,y,x]
                    c = 0: raw data, c=1: nonzeropixels representing ground truth centers, requiring same naming as src_cnn
    size = used in get_pixels_around_center; distance from point in zyx.
        Note: this is effectively a radius (NOT DIAMETER).
        note:
    cutoff = allowable distance (IN PIXELS) for a True positive when considering ground truth centers with centers of mass of cnn-labelled connected components
    pad = (optional) important for edge cases in training set (i.e. points that don't have sufficient border around them)
        True if pnt is on edge of image, function pads evenly
        Flase if pnt is on edge of image, drop
    collect_cnn (optional): if true keep these data for training as well
    maxip = int, number of maxips to ravel into data
    return:
        {'true_positives': tps, 'false_positives': fps, 'ground_truths': gts}

    '''
    cnn_output = listdirfull(cnn_output) if type(cnn_output) == str else cnn_output
    training_data_src = listdirfull(training_data_src) if type(training_data_src) == str else training_data_src
    intersection = list(set([os.path.basename(xx[:-4]) for xx in training_data_src]).intersection(set([os.path.basename(xx) for xx in cnn_output])))
    if verbose: print('Collect cnn == {}'.format(collect_cnn))
    tps=[]; fps=[]; gts = []
    for pth in intersection:
        if verbose: sys.stdout.write('Starting {}'.format(pth))

        #load raw and gts
        data = load_np(os.path.join(os.path.dirname(training_data_src[0]), pth+'.npy'))
        raw = data[0]
        ground_truth = data[1]
        gt = np.asarray(np.nonzero(ground_truth)).T

        #get labels and pair based on distance
        centers = probabilitymap_to_centers_thresh(os.path.join(os.path.dirname(cnn_output[0]), pth), threshold = 1, numZSlicesPerSplit=250, overlapping_planes = 40, cores = 4, verbose = verbose)
        try:
            paired, tp, fp, fn = pairwise_distance_metrics_multiple_cutoffs(gt, centers, verbose=False, return_paired=True, cutoffs=[cutoff])[0]
    
            #optional
            cnn_src = os.path.join(os.path.dirname(cnn_output[0]), pth) if collect_cnn == True else False
            TP = [centers[xx[1]] for xx in paired]
            TPS = get_pixels_around_center(np.asarray(TP).astype('int'), raw, cnn_src=cnn_src, size=size, pad=pad, maxip=maxip)
            FP = np.asarray(list(set(centers).difference(set(TP))))
            FPS = get_pixels_around_center(np.asarray(FP).astype('int'), raw, cnn_src=cnn_src, size=size, pad=pad, maxip=maxip)
            
            #append
            tps.append(TPS); fps.append(FPS); gts.append(gt)
        except Exception, e:
            break
            print ('\n\n\nSkipping {}, due to error: {}\n\n\n'.format(pth, e))
        #zmd added 20190312 - these should be in order of points inputted from raw space
        np.save(os.path.join(dst, "annotation_pixel_value_coordinates.npy"),
                point_lst)

        df = count_structure_lister(id_table, *point_lst).fillna(0)
        df.to_csv(
            os.path.join(
                dst,
                os.path.basename(id_table).replace('.xlsx', '') +
                '_with_anatomical_assignment_of_cell_counts.csv'))

        #load and convert to single voxel loc
        zyx = np.asarray([
            str((int(xx[0]), int(xx[1]), int(xx[2])))
            for xx in load_np(converted_points)
        ])
        zyx_cnt = Counter(zyx)

        #manually call transformix..
        transformed_dst = os.path.join(dst1, 'transformed_points')
        makedir(transformed_dst)
        if qc_overlay_transform_type == 'all':
            tp0 = [
                xx
                for xx in listall(os.path.dirname(cellvol.ch_to_reg_to_atlas),
                                  'TransformParameters.0.txt')
                if 'sig_to_reg' in xx and 'regtoatlas' not in xx
            ][0]
            tp1 = [
                xx
def apply_random_forest(classifier, raw_src, cnn_src, collect_cnn = False, size = (3,12,12), pad=False, cores=4, numZSlicesPerSplit=300, overlapping_planes = 20, chunks=10, maxip=0):
    ''' THIS IS MEMORY INEFFICIENT - SEE random_forest.py for better functions
    classifier = pretrained random forest or path to pretrained random forest
    raw_src = folder of tiffs of raw input data
    cnn_src = folder of tiffs from cnn output
    pad = if True, pad the edges of objects determined. False: remove edge cases, usually better since they will be few in number
    cores = number of cores for parallelization, larger the number the less memory efficient
    numZSlicesPerSplit: chunk of zplanes to process at once. Adjust this and cores based on memory constraints.
    overlapping_planes: number of planes on each side to overlap by, this should be a comfortable amount larger than the maximum z distances of a single object
    chunks = number of chunks to divide connected components by. The larger the number the more memory efficiency, but a bit more IO required
    collect_cnn = optional to include cnn data for random forest input
    Returns a dictionary consisting of k=centers, v=corresponding pixel indices determine by CNN

    TO DO - MAKE SURE MAPPING STAYS THE SAME AND ORDER IS NOT LOST

    classifier = '/home/wanglab/wang/pisano/Python/lightsheet/supp_files/h129_rf_classifier.pkl'
    cnn_src = '/home/wanglab/wang/pisano/conv_net/annotations/better_res/h129_memmap_arrays_cnn_output/20170204_tp_bl6_cri_1000r_02_1hfds_647_0010na_25msec_z7d5um_10povlap_ch00_z200-400_y1350-1700_x3100-3450'
    raw_src =  load_np('/home/wanglab/wang/pisano/conv_net/annotations/better_res/h129_memmap_arrays/20170204_tp_bl6_cri_1000r_02_1hfds_647_0010na_25msec_z7d5um_10povlap_ch00_z200-400_y1350-1700_x3100-3450.npy')[0]
    '''
    rf = joblib.load(classifier) if type(classifier) == str else classifier

    #load and find centers from cnn
    center_pixels_dct = probabilitymap_to_centers_thresh(cnn_src, threshold = 1, numZSlicesPerSplit=numZSlicesPerSplit, overlapping_planes = overlapping_planes, cores = cores, return_pixels = True, verbose = False)

    #optional
    if collect_cnn == False: cnn_src = False

    #load and collect pixels - if path to folder of tiffs will be done in memory efficient way
    if type(raw_src) == str and raw_src[:-4] == '.npy':  inn = get_pixels_around_center(pnts=center_pixels_dct.keys(), src=load_np(raw_src), cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, maxip=maxip)
    elif type(raw_src) == str and raw_src[:-4] == '.tif': inn = get_pixels_around_center(pnts=center_pixels_dct.keys(), src=tifffile.imread(raw_src), cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, maxip=maxip)
    elif type(raw_src) == str: inn = get_pixels_around_center_mem_eff(pnts=center_pixels_dct.keys(), src=raw_src, cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, cores=cores, chunks=chunks,maxip=maxip)
    elif str(type(raw_src)) in ["<class 'numpy.core.memmap.memmap'>", "<type 'numpy.ndarray'>"]: inn = get_pixels_around_center(pnts=center_pixels_dct.keys(), src=raw_src, cnn_src=cnn_src, size = size, pad=pad, return_pairs=True, maxip=maxip)

    #predict
    out = rf.predict(inn.values())

    #remove centers that are considered false positives
    centers = np.asarray([xx for i,xx in enumerate(inn.keys()) if out[i]==1])

    #remove non determine centers from above
    center_pixels_dct = {tuple(c):center_pixels_dct[tuple(c)] for c in centers}

    return center_pixels_dct
    #parse
    dct = bin_data(cnn_output, training_data_src, size=size, cutoff = cutoff, pad = pad, verbose=verbose, maxip=maxip)
    tps = dct['true_positives']; fps = dct['false_positives']; gts = dct['ground_truths']

    #train
    kwargs = train_random_forest(tps, fps, n_estimator = n_estimator, max_depth = max_depth, balance = balance, cores = cores, kfold_splits = kfold_splits, dst = dst, average=precision_score)

    #plot
    #%matplotlib inline
    save = '/home/wanglab/Downloads/rf'
    plot_roc(save=save, **kwargs)

    #apply
    cnn_src = listdirfull(cnn_output); cnn_src.sort(); cnn_src = cnn_src[0]
    inn = listdirfull(training_data_src); inn.sort();
    raw_src = load_np(inn[0])[0]
    gt = load_np(inn[0])[1]
    centers = apply_random_forest(kwargs['classifier'], raw_src, cnn_src, size = (7,25,25))

    #show
    from tools.conv_net.functions.dilation import dilate_with_element, ball
    gt = dilate_with_element(gt, ball(5))
    src = np.zeros_like(gt)
    for c in centers.astype('int'):
        src[c[0],c[1],c[2]] = 1
    src = dilate_with_element(src, ball(5))

    #Sweep: <-- usually performance is not affected that much by this
    for n_estimator in (10,20,50,100):
        for max_depth in (5,10,20,50,100):
            print('\n\n n_estimator--{}, max_depth--{}'.format(n_estimator, max_depth))
Example #5
0
 dst1 = os.path.join(dst0, 'elastix'); makedir(dst1)
 
 #####check cell detection (modeled from lightsheet/tools/registration/transform_cell_counts)
 #3dunet cell dataframe
 dataframe = pd.read_csv(listdirfull(os.path.join(fld, '3dunet_output/pooled_cell_measures'), '.csv')[0])
 
 #####generate a downsized version######
 if generate_downsized_overlay:
     cellvolloaded = tifffile.imread(cellvol.resampled_for_elastix_vol)
     cnn_cellvolloaded = np.zeros_like(cellvolloaded)
     zyx = dataframe[['z','y','x']].values
     #adjust for reorientation THEN rescaling, remember full size data needs dimension change releative to resample
     fullsizedimensions = get_fullsizedims_from_kwargs(kwargs) #don't get from kwargs['volumes'][0].fullsizedimensions it's bad! use this instead
     zyx = fix_contour_orientation(zyx, verbose=verbose, **kwargs) #now in orientation of resample
     zyx = points_resample(zyx, original_dims = fix_dimension_orientation(fullsizedimensions, **kwargs), resample_dims = tifffile.imread(cellvol.resampled_for_elastix_vol).shape, verbose = verbose)[:, :3]
     zyx = np.asarray([str((int(xx[0]), int(xx[1]), int(xx[2]))) for xx in load_np(zyx)])
     from collections import Counter
     zyx_cnt = Counter(zyx)
     #now overlay
     for zyx,v in zyx_cnt.iteritems():
         z,y,x = [int(xx) for xx in zyx.replace('(','',).replace(')','').split(',')]
         try:
             cnn_cellvolloaded[z,y,x] = v*100
         except Exception, e:
             print e
     merged = np.stack([cnn_cellvolloaded, cellvolloaded, np.zeros_like(cellvolloaded)], -1)
     merged = np.swapaxes(merged, 0,2)#reorient to horizontal
     tifffile.imsave(os.path.join(dst, '{}_points_merged_resampled_for_elastix.tif'.format(os.path.basename(fld))), merged)         
 
 #EXAMPLE USING LIGHTSHEET - assumes marking centers in the 'raw' full sized cell channel. This will transform those centers into "atlas" space (in this case the moving image)
 #in this case the "inverse transform has the atlas as the moving image in the first step, and the autofluorescence channel as the moving image in the second step 
Example #6
0
    #make 200,350,350
    zrng = range(200, 600, 200)  #at least a delta of 100
    yrng = range(1000, 5000, 350)
    xrng = range(1000, 5000, 350)
    src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170204_tp_bl6_cri_1000r_02/full_sizedatafld/20170204_tp_bl6_cri_1000r_02_1hfds_647_0010na_25msec_z7d5um_10povlap_ch00'
    src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170116_tp_bl6_lob7_500r_09/full_sizedatafld/20170116_tp_bl6_lob7_500r_09_647_010na_z7d5um_75msec_10povlp_ch00'
    src = '/home/wanglab/wang/pisano/tracing_output/antero_4x/20170130_tp_bl6_sim_1750r_03/full_sizedatafld/20170130_tp_bl6_sim_1750r_03_647_010na_1hfds_z7d5um_50msec_10povlp_ch00'
    src = '/home/wanglab/wang/pisano/tracing_output/retro_4x/20180215_jg_bl6f_prv_10/full_sizedatafld/20180215_jg_bl6f_prv_10_647_010na_z7d5um_250msec_10povlap_ch00'
    src = '/home/wanglab/wang/pisano/tracing_output/retro_4x/20180215_jg_bl6f_prv_07/full_sizedatafld/20180215_jg_bl6f_prv_07_647_010na_z7d5um_250msec_10povlap_ch00'
    dst = os.path.join('/home/wanglab/Downloads/', os.path.basename(src))
    makedir(dst)
    lst = listdirfull(src, keyword='.tif')
    lst.sort()
    make_memmap_from_tiff_list(lst, dst + '.npy')

    arr = load_np(dst + '.npy')
    makedir(dst)
    dst = os.path.join(
        '/home/wanglab/wang/pisano/conv_net/annotations/better_res',
        os.path.basename(src))
    makedir(dst)
    for i in range(len(zrng) - 1):
        for ii in range(len(yrng) - 1):
            for iii in range(len(xrng) - 1):
                z, zz = zrng[i], zrng[i + 1]
                y, yy = yrng[ii], yrng[ii + 1]
                x, xx = xrng[iii], xrng[iii + 1]
                tifffile.imsave(os.path.join(
                    dst, '{}_z{}-{}_y{}-{}_x{}-{}.tif'.format(
                        os.path.basename(src), z, zz, y, yy, x, xx)),
                                arr[z:zz, y:yy, x:xx],