Esempio n. 1
0
def crop_mrc(mrc_path,
             crop_path,
             x=0,
             y=0,
             z=0,
             dx=100,
             dy=100,
             dz=100,
             print_header_diff=False):
    """Crop specified part of a 3d mrc file, position cropped: mrc.data[x:x+dx, y:y+dy, z:z+dz].
    
    Note that the axis order of mrc data is (x, y, z), different from that downstream tasks use, eg. read_mrc_data and imod: (z, y, x).

    Arguments:
        mrc_path -- source mrc file path  
        crop_path -- destination path of cropped mrc file

    Keyword Arguments:
        x {int} -- lowerbound x coordinate to crop (default: {0})
        y {int} -- lowerbound y coordinate to crop (default: {0})
        z {int} -- lowerbound z coordinate to crop (default: {0})
        dx {int} -- length of x to crop (default: {100})
        dy {int} -- length of y to crop (default: {100})
        dz {int} -- length of z to crop (default: {100})
        print_header_diff {bool} -- whether to print difference between the cropped and original (default: {False})
    """
    # Use mmap for faster reading large mrcfile
    with mrcfile.mmap(mrc_path,
                      mode='r') as mrc, mrcfile.new(crop_path) as mrc_crop:
        mrc_crop.set_data(
            mrc.data[x:x + dx, y:y + dy, z:z +
                     dz])  # set_data automatically syncs header info with data
        mrc_crop.voxel_size = mrc.voxel_size
    mrcfile.validate(crop_path)

    # Print header diff
    if print_header_diff:
        mrc_header = io_file.read_mrc_header(mrc_path)
        crop_header = io_file.read_mrc_header(crop_path)
        diffs = []
        for (k1, v1), (k2, v2) in zip(mrc_header.items(), crop_header.items()):
            if k1 == k2 and v1 != v2:
                if isinstance(v1, dict):
                    assert len(v1) == len(
                        v2), "Different dict size: {}, {}".format(
                            len(v1), len(v2))
                    diff_dict = {
                        k: (v1[k], v2[k])
                        for k in v1 if k in v2 and v1[k] != v2[k]
                    }
                    print("diff key: ", k1, "\ndiff_dict:\n", diff_dict)
                else:
                    print("diff key: ", k1, "\n", v1, "\n", v2, "\n")
                diffs.append((v1, v2))
        print("# diffs: ", len(diffs))
Esempio n. 2
0
def main():
    path, output = getParams()

    # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution
    # crop_path = 'cropped.mrc'
    # crop_mrc(path, crop_path)

    mrc_header = io_file.read_mrc_header(path)
    voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][
        'nx'] / 10
    print("voxel_spacing_in_nm: %s" % voxel_spacing_in_nm)

    # Note: with our test data, voxel_spacing_in_nm has 0 and next division fails.
    sigma1 = 2
    try:
        # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
        sigma1 = max(int(7 / voxel_spacing_in_nm), sigma1)
    except Exception as e:
        pass

    print('sigma1=%d' % sigma1)
    # For particular tomogram, larger sigma1 value may have better results.
    # Use IMOD to display selected peaks and determine best sigma1.
    # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance
    # (in this tomogram, 7nm corresponds to 3.84 pixels)
    # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)

    partition_op = {
        'nonoverlap_width': sigma1 * 20,
        'overlap_width': sigma1 * 10,
        'save_vg': False
    }
    result = picking(path,
                     s1=sigma1,
                     s2=sigma1 * 1.1,
                     t=3,
                     find_maxima=False,
                     partition_op=partition_op,
                     multiprocessing_process_num=100)
    print("%d particles detected, containing redundant peaks" % len(result))
    # remove redundant peaks
    result = do_filter(pp=result, peak_dist_min=sigma1, op=None)
    print("peak number reduced to %d" % len(result))
    pprint(result[:5])

    # generate file for 3dmod
    json_data = []
    for i in range(len(result)):
        loc_np = result[i]['x']
        loc = []
        for j in range(len(loc_np)):
            loc.append(loc_np[j].tolist())
        json_data.append({'peak': {'loc': loc}})

    with open(output, 'w') as f:
        json.dump(json_data, f)
Esempio n. 3
0
def main():
    # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp
    path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_cellular_tomogram.mrc'

    # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution
    # crop_path = 'cropped.mrc'
    # crop_mrc(path, crop_path)

    mrc_header = io_file.read_mrc_header(path)
    voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][
        'nx'] / 10
    sigma1 = max(
        int(7 / voxel_spacing_in_nm), 2
    )  # 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
    # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)
    partition_op = {
        'nonoverlap_width': sigma1 * 20,
        'overlap_width': sigma1 * 10,
        'save_vg': False
    }
    result = picking(path,
                     s1=sigma1,
                     s2=sigma1 * 1.1,
                     t=3,
                     find_maxima=False,
                     partition_op=partition_op,
                     multiprocessing_process_num=100)
    print("%d particles detected, containing redundant peaks" % len(result))
    result = do_filter(pp=result, peak_dist_min=sigma1,
                       op=None)  # remove redundant peaks
    print("peak number reduced to %d" % len(result))
    pprint(result[:5])

    # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/)
    a = io_file.read_mrc_data(path)
    json_data = []  # generate file for 3dmod
    for i in range(len(result)):
        loc_np = result[i]['x']
        loc = []
        for j in range(len(loc_np)):
            loc.append(loc_np[j].tolist())
        json_data.append({'peak': {'loc': loc}})
    with open('data_json_file.json', 'w') as f:
        json.dump(json_data, f)

    dj = json_data
    x = N.zeros((len(dj), 3))
    for i, d in enumerate(dj):
        x[i, :] = N.array(d['peak']['loc'])

    l = generate_lines(x_full=x, rad=sigma1)
    display_map_with_lines(l=l, map_file=path)
Esempio n. 4
0
 def select_sigma(self):
     mrc_header = io_file.read_mrc_header(self.path)
     voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC']['nx'] / 10
     # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
     return max(7 / voxel_spacing_in_nm, 2)  
Esempio n. 5
0
def main():
    # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp
    path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_single_particle_tomogram.mrc'

    # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution
    # crop_path = 'cropped.mrc'
    # crop_mrc(path, crop_path)

    mrc_header = io_file.read_mrc_header(path)
    voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][
        'nx'] / 10
    sigma1 = max(
        int(7 / voxel_spacing_in_nm), 2
    )  # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
    print('sigma1=%d' % sigma1)
    # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1.
    # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels)
    # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)

    partition_op = {
        'nonoverlap_width': sigma1 * 20,
        'overlap_width': sigma1 * 10,
        'save_vg': False
    }
    result = picking(path,
                     s1=sigma1,
                     s2=sigma1 * 1.1,
                     t=3,
                     find_maxima=False,
                     partition_op=partition_op,
                     multiprocessing_process_num=10,
                     pick_num=1000)
    print("DoG done, %d particles picked" % len(result))
    pprint(result[:5])

    # (Optional) Save subvolumes of peaks for autoencoder input
    dump_subvols = True
    if dump_subvols:  # use later for autoencoder
        subvols_loc = "demo_single_particle_subvolumes.pickle"
        from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes
        a = io_file.read_mrc_data(path)
        d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32)
        io_file.pickle_dump(d, subvols_loc)
        print("Save subvolumes .pickle file to:", subvols_loc)

    # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/)
    '''
    #Optional: smooth original image
    a = io_file.read_mrc_data(path) 
    path =path[:-5]+'_smoothed'+path[-4:]
    temp = im_vol_util.cub_img(a)
    s1 = sigma1
    s2=sigma1*1.1
    vg = dog_smooth(temp['vt'], s1,s2)
    #vg = smooth(temp['vt'], s1)
    TIM.write_data(vg,path)
    '''
    json_data = []  # generate file for 3dmod
    for i in range(len(result)):
        loc_np = result[i]['x']
        loc = []
        for j in range(len(loc_np)):
            loc.append(loc_np[j].tolist())
        json_data.append({'peak': {'loc': loc}})
    with open('data_json_file.json', 'w') as f:
        json.dump(json_data, f)

    dj = json_data
    x = N.zeros((len(dj), 3))
    for i, d in enumerate(dj):
        x[i, :] = N.array(d['peak']['loc'])

    l = generate_lines(x_full=x, rad=sigma1)
    display_map_with_lines(l=l, map_file=path)
Esempio n. 6
0
                      for j in range(cluster_center_number)])
    for i in range(0, b.shape[0]):
        for j in range(0, b.shape[1]):
            for k in range(0, b.shape[2]):
                sum_f[label[i][j][k]][1] = sum_f[label[i][j][k]][1] + 1
                sum_f[label[i][j]
                      [k]][0] = sum_f[label[i][j][k]][0] + b[i][j][k]
    for i in range(cluster_center_number):
        assert sum_f[i][1] > 0
        result[i] = sum_f[i][0] / sum_f[i][1]
    return result


if __name__ == "__main__":
    path = './aitom_demo_single_particle_tomogram.mrc'  # file path
    mrc_header = io_file.read_mrc_header(path)
    a = io_file.read_mrc_data(path)  # volume data
    assert a.shape[0] > 0
    a = a.astype(np.float32)
    print("file has been read, shape is", a.shape)
    start_time = time.time()
    saliency_detection(a=a,
                       gaussian_sigma=2.5,
                       gabor_sigma=14.0,
                       gabor_lambda=13.0,
                       cluster_center_number=10000,
                       multiprocessing_num=0,
                       pick_num=1000,
                       save_flag=True)
    end_time = time.time()
    print('saliency detection takes', end_time - start_time, 's')