예제 #1
0
def main():
    # user input
    input_params = SimpleNamespace()

    # Make adjustments here:
    input_params.parent_path = '/Users/swilson/Documents/Grad_Folder2018/young_lab_rotation/Fiji-Images/test_data_3D'
    input_params.output_path = '/Users/swilson/Documents/Grad_Folder2018/young_lab_rotation/Fiji-Images/output_3D'
    file_extension = '.nd'
    separate_rep_imgs = True

    if not os.path.isdir(input_params.output_path):
        os.mkdir(input_params.output_path)

    #parse directory of data and run analysis on each replicate
    data_files = methods.parse_tree(input_params.parent_path, file_extension)

    npuncta_list = []
    exp_list = []
    nuclei_list = []
    intensities_hp1 = []
    intensities_med1 = []
    for condition, rep_files in data_files.items():
        excel_output = pd.DataFrame(columns=[
            'sample', 'replicate_id', 'nuclear_id', 'total_nuc_voxels',
            'channel', 'mean_in', 'mean_out', 'norm_mean', 'total_in',
            'total_out', 'norm_total'
        ])
        replicate_count = 1
        for rep in rep_files:  #REPLICATES
            data = SimpleNamespace()
            base_file = [f for f in rep if file_extension in f][0]
            data.rep_name = base_file.replace(file_extension, '')
            data.condition = condition
            data.rep_files = rep

            data = methods.load_images(data, input_params)
            data = methods.find_nucleus_3D(data, input_params)

            spots = pd.DataFrame(
                columns=['nuc_id', 'spot_id', 'channel', 'r', 'c', 'z'])
            spot_count = 0

            for channel in data.pro_imgs:

                print(data.rep_name)
                print(channel)

                ###    Second Attempt   ###
                if channel == 'ch488':

                    if 'HP1' in data.rep_name:
                        blur_img = methods.gaussian_blur(
                            data.pro_imgs[channel])
                    elif 'Med1' in data.rep_name:
                        blur_img = methods.gaussian_blur(
                            data.pro_imgs[channel])

                    elif 'fib1' in data.rep_name:
                        blur_img = methods.gaussian_blur(
                            data.pro_imgs[channel])

                    puncta_labels, npuncta, puncta, puncta_mask = methods.threshold_puncta(
                        blur_img, data, input_params, channel)
                    npuncta_list.append(npuncta)
                    exp = data.rep_name + "_" + channel
                    exp_list.append(exp)

                    if 'HP1' in data.rep_name:
                        intensities = methods.intensity_at_puncta(
                            puncta_labels, data.pro_imgs['ch561'], npuncta,
                            data, input_params)
                        intensities_hp1.append(intensities)

                    elif 'Med1' in data.rep_name:
                        intensities = methods.intensity_at_puncta(
                            puncta_labels, data.pro_imgs['ch561'], npuncta,
                            data, input_params)
                        intensities_med1.append(intensities)

                    manders_list = []
                    for i in range(1, 10):
                        print(i)
                        random_puncta_mask = methods.random_regions(
                            npuncta, puncta, data.pro_imgs['ch561'], data,
                            input_params)
                        manders = methods.manders(random_puncta_mask,
                                                  data.pro_imgs['ch561'],
                                                  npuncta, data, input_params)
                        manders_list.append(manders)
                    print(np.mean(manders_list))
                    manders_protein = methods.manders(puncta_mask,
                                                      data.pro_imgs['ch561'],
                                                      npuncta, data,
                                                      input_params)
                    print(manders_protein)
                print('DONE')

                ## Colocalization analysis
                ## Two colors
                #no_backgrnd_img_488 = methods.subtract_median(data.pro_imgs['ch488'],data,input_params)
                #no_backgrnd_img_561 = methods.subtract_median(data.pro_imgs['ch561'],data,input_params)
                #methods.colocalize(no_backgrnd_img_488,no_backgrnd_img_561,data,input_params)#no_backgrnd_img_488,no_backgrnd_img_561,data,input_params)

            npuncta_list.append(npuncta)
            exp = data.rep_name + "_" + channel
            exp_list.append(exp)
            f = open("num_puncta_per_channel_cisplatin.csv", 'w')
            for exp in exp_list:
                f.write(exp)
                f.write("\t")
            f.write("\n")
            for n in npuncta_list:
                f.write(str(n))
                f.write("\t")
    num_bins = 50
    flattened_hp1 = [val for sublist in intensities_hp1 for val in sublist]
    plt.hist(flattened_hp1, num_bins, facecolor='blue', alpha=0.5)
    plt.savefig('histogram_hp1_allimgs' + '.png', dpi=300)
    plt.close()

    num_bins = 50
    flattened_med1 = [val for sublist in intensities_med1 for val in sublist]
    plt.hist(flattened_med1, num_bins, facecolor='blue', alpha=0.5)
    plt.savefig("histogram_med1_allimgs" + '.png', dpi=300)
    plt.close()
예제 #2
0
        replicate_output = pd.DataFrame()
        input_params.replicate_count = 1
        for idx, file in enumerate(base_name_files):
            data = SimpleNamespace(
            )  # this is the session data object that will be passed to functions. Corresponds to one replicate

            sample_name = file.replace(file_ext, '')
            replicate_files = [
                os.path.join(input_params.parent_path, folder, r)
                for r in file_list if sample_name in r and os.path.isfile(
                    os.path.join(input_params.parent_path, folder, r))
            ]

            replicate_files = np.sort(replicate_files)

            data = methods.load_images(replicate_files, data, input_params,
                                       folder)
            data = methods.find_scaffold(data, input_params)
            data, rep_bulk, rep_total = methods.find_droplets(
                data, input_params)
            data = methods.measure_droplets(data, input_params, rep_bulk)
            replicate_output = replicate_output.append(data.replicate_output,
                                                       ignore_index=True)

            if len(bulk_sig) == 0:
                for c in data.channel_names:
                    bulk_sig[c] = [rep_bulk[c]]
                    total_sig[c] = [rep_total[c]]

            else:
                for c in data.channel_names:
                    bulk_sig[c].append(rep_bulk[c])
if whichdataset == 'York':
    input = np.load('YCMRI_128x128_images.npy')
    labels = np.load('YCMRI_128x128_labels.npy')
    path = 'York_results'

if whichdataset == 'ACDC':

    data_dir = 'ACDC_dataset/training'
    raw_image_path01 = '_frame01.nii.gz'
    raw_image_path12 = '_frame12.nii.gz'
    label_path01 = '_frame01_gt.nii.gz'
    label_path12 = '_frame12_gt.nii.gz'
    path = 'ACDC_results'


    images_paths, labels_paths = methods.load_images(data_dir, raw_image_path01, raw_image_path12, label_path01, label_path12)
    images_paths.sort()   # each label should be on the same index than its corresponding image
    labels_paths.sort()

    img_data = methods.load_data(images_paths)


    input = np.load('unet_input.npy')
    labels = np.load('unet_labels.npy')


unet_input = []
unet_labels = []
#TODO variable amount of slices per person? (in percentages)
total_number_of_patients = len(input)
예제 #4
0
def main(data_path, nuc_idx=0, pro_idx=1, threshold=None):
    # user input
    input_params = SimpleNamespace()

    input_params.parent_path = data_path
    input_params.nuc_idx = nuc_idx
    input_params.pro_idx = pro_idx

    if threshold is not None:
        input_params.threshold = threshold / 65536
    else:
        input_params.threshold = threshold

    folder_list = os.listdir(input_params.parent_path)
    folder_list.sort(reverse=False)
    file_ext = '.czi'

    # make output directories
    #input_params.output_path = input_params.parent_path
    input_params.output_path = '/lab/solexa_young/scratch/MECP2_Imaging/20191112_neuron_imaging/Volumes_extra/gaussian-sigma3_mean2.35_struct3_dist0.2'

    if not os.path.isdir(input_params.output_path):
        os.mkdir(input_params.output_path)

    for folder in folder_list:
        if not folder.startswith('.') and not folder.endswith(
                'output') and os.path.isdir(
                    os.path.join(input_params.parent_path,
                                 folder)):  #SAMPLES/EXPERIMENTS
            print()
            print('Started: ', folder, ' at ', datetime.now())
            print()

            temp_output = os.path.join(input_params.output_path,
                                       folder + '_output')

            if not os.path.isdir(temp_output):
                os.mkdir(temp_output)

            file_list = os.listdir(
                os.path.join(input_params.parent_path, folder))
            base_name_files = [
                f for f in file_list if file_ext in f and os.path.isfile(
                    os.path.join(input_params.parent_path, folder, f))
            ]
            base_name_files.sort(reverse=False)

            excel_output = pd.DataFrame(columns=[
                'sample', 'replicate_id', 'nuc_id', 'total_nuc_voxels',
                'channel', 'mean_in', 'mean_out', 'norm_mean', 'total_in',
                'total_out', 'norm_total'
            ])

            objects_output = pd.DataFrame(columns=[
                'sample', 'replicate_id', 'nuc_id', 'object_id', 'voxels',
                'channel', 'mean_in', 'mean_out', 'norm_mean'
            ])

            replicate_count = 1
            for idx, file in enumerate(base_name_files):  #REPLICATES
                print()
                print(file)
                print()
                data = SimpleNamespace()
                data.sample_name = file.replace(file_ext, '')
                data.folder = folder
                data.img_path = os.path.join(input_params.parent_path, folder,
                                             file)

                data = methods.load_images(data, input_params)
                if data is not None:
                    data = methods.find_nucleus_2D(data, input_params)
                    data.z_count = data.nuc_img.shape[0]

                    if idx == 0:
                        # z = int(data.nucleus_image.shape[0]/2)
                        z = 10
                        # make_nucleus_montage_2D(data, input_params)

                    total_dense_object_mask = np.full(shape=data.nuc_img.shape,
                                                      fill_value=False,
                                                      dtype=bool)
                    for r_idx, region in enumerate(data.nuc_regions):
                        region_area = methods.find_region_area(region)
                        if region_area >= 30000:

                            nuc_id = data.nuc_label[int(
                                (region[0].stop + region[0].start) / 2),
                                                    int((region[1].stop +
                                                         region[1].start) / 2)]
                            nuc_box = data.nuc_img[:, region[0], region[1]]
                            nuc_mask_box = data.nuc_label[region[0], region[1]]
                            single_nuc_mask = nuc_mask_box == nuc_id
                            single_nuc_mask = np.repeat(
                                single_nuc_mask[np.newaxis, :, :],
                                data.z_count,
                                axis=0
                            )  # because our nuclear mask is 2D so we project it to 3D

                            dense_obj_mask, bg_nuc_mask, dense_objects = methods.find_dense_objects_3D(
                                nuc_box, single_nuc_mask, input_params, data)
                            total_dense_object_mask[:, region[0], region[1]][
                                dense_obj_mask] = True

                            for p_idx, image in enumerate(data.pro_imgs):
                                channel_name = data.pro_ch_names[p_idx]
                                protein_box = image[:, region[0], region[1]]

                                mean_in = np.mean(protein_box[dense_obj_mask])
                                total_in = np.sum(protein_box[dense_obj_mask])

                                mean_out = np.mean(protein_box[bg_nuc_mask])
                                total_out = total_in + np.sum(
                                    protein_box[bg_nuc_mask])

                                norm_mean = mean_in / mean_out
                                norm_total = total_in / total_out

                                nuc_voxels = np.sum(dense_obj_mask)

                                excel_output = excel_output.append(
                                    {
                                        'sample': folder,
                                        'replicate_id': replicate_count,
                                        'nuc_id': nuc_id,
                                        'total_voxels': nuc_voxels,
                                        'channel': str(channel_name),
                                        'mean_in': mean_in,
                                        'mean_out': mean_out,
                                        'norm_mean': norm_mean,
                                        'total_in': total_in,
                                        'total_out': total_out,
                                        'norm_total': norm_total
                                    },
                                    ignore_index=True)

                                for o_idx, object in enumerate(dense_objects):
                                    voxels = np.sum(dense_obj_mask[object])
                                    mean_in = np.mean(
                                        protein_box[object]
                                    )  # not perfect because this is just a 3D bounding box, which will include pixels not in the region, but good enough for now!

                                    objects_output = objects_output.append(
                                        {
                                            'sample': folder,
                                            'replicate_id': replicate_count,
                                            'nuc_id': nuc_id,
                                            'object_id': o_idx + 1,
                                            'voxels': voxels,
                                            'channel': str(channel_name),
                                            'mean_in': mean_in,
                                            'mean_out': mean_out,
                                            'norm_mean': mean_in / mean_out
                                        },
                                        ignore_index=True)

                    graph_output_path = os.path.join(
                        temp_output,
                        folder + '_rep' + str(replicate_count) + '.png')
                    methods.make_output_graphs(data.nuc_label,
                                               total_dense_object_mask, data,
                                               graph_output_path)
                    replicate_count += 1
                else:
                    replicate_count += 1

            excel_output.to_excel(os.path.join(temp_output,
                                               folder + '_enrichment.xlsx'),
                                  index=False)
            objects_output.to_excel(os.path.join(temp_output,
                                                 folder + '_objects.xlsx'),
                                    index=False)