Beispiel #1
0
def picking(path,
            s1,
            s2,
            t,
            find_maxima=True,
            partition_op=None,
            multiprocessing_process_num=0):

    a = io_file.read_mrc_data(path)
    print("file has been read")
    temp = im_vol_util.cub_img(a)
    a_im = temp['im']  # image data
    a_vt = temp['vt']  # volume data

    peaks = peak__partition(
        a_vt,
        s1=s1,
        s2=s2,
        find_maxima=find_maxima,
        partition_op=partition_op,
        multiprocessing_process_num=multiprocessing_process_num)
    # using DoG to detect all peaks, may contain peaks caused by noise

    # calculate threshold T and delete peaks whose val are smaller than threshold
    # Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405.

    M = peaks[0]['val']  # max val of all peaks
    m = peaks[len(peaks) - 1]['val']  # min val of all peaks
    T = m + t * (M - m) / 20
    for i in range(len(peaks)):
        if peaks[i]['val'] < T:
            res = peaks[0:i - 1]
            break
    print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" % (T, m, t, M))
    return res
Beispiel #2
0
def picking(path,
            s1,
            s2,
            t,
            find_maxima=True,
            partition_op=None,
            multiprocessing_process_num=0,
            pick_num=None):
    '''
    parameters:
    path:file path  s1:sigma1  s2:sigma2  t:threshold level  find_maxima:peaks appears at the maximum/minimum  multiprocessing_process_num: number of multiporcessing
    partition_op: partition the volume for multithreading, is a dict consists 'nonoverlap_width', 'overlap_width' and 'save_vg'
    pick_num: the max number of particles to pick out
    # Take a two-dimensional image as an example, if the image size is 210*150(all in pixels), nonoverlap_width is 60 and overlap_width is 30.
    # It will be divided into 6 pieces for different threads to process. The ranges of their X and Y are
    # (first line)  (0-90)*(0-90) (60-150)*(0-90) (120-210)*(0-90) (0-90)
    # (second line) (0-90)*(60-150) (60-150)*(60-150) (120-210)*(60-150)
    In general, s2=1.1*s1, s1 and t depend on particle size and noise. In practice, s1 should be roughly equal to the particle radius(in pixels). In related paper, the model achieves highest comprehensive score when s1=7 and t=3. 

    return:
    a list including all peaks information (in descending order of value),  each element in the return list looks like: 
    {'val': 281.4873046875, 'x': [1178, 1280, 0], 'uuid': '6ad66107-088c-471e-b65f-0b3b2fdc35b0'}
    'val' is the score of the peak when picking, only the score is higher than the threshold will the peak be selected.
    'x' is the center of the peak in the tomogram.
    'uuid' is an unique id for each peak.
    '''
    a = io_file.read_mrc_data(path)
    print("file has been read")
    temp = im_vol_util.cub_img(a)
    a_im = temp['im']  # image data
    a_vt = temp['vt']  # volume data

    # using DoG to detect all peaks, may contain peaks caused by noise
    peaks = peak__partition(
        a_vt,
        s1=s1,
        s2=s2,
        find_maxima=find_maxima,
        partition_op=partition_op,
        multiprocessing_process_num=multiprocessing_process_num)

    # calculate threshold T and delete peaks whose val are smaller than threshold
    # Related paper: Pei L, Xu M, Frazier Z, Alber F. Simulating Cryo-Electron Tomograms of Crowded Mixtures of Macromolecular Complexes and Assessment of Particle Picking. BMC Bioinformatics. 2016; 17: 405.
    M = peaks[0]['val']  # max val of all peaks
    m = peaks[len(peaks) - 1]['val']  # min val of all peaks
    T = m + t * (M - m) / 20
    peak_vals_neg = [-peak['val'] * find_maxima for peak in peaks]
    res = peaks[:bisect(peak_vals_neg, -T * find_maxima) - 1]
    assert res[-1]['val'] >= T
    print("%d particles detected, containing redundant peaks" % len(res))
    result = do_filter(pp=res, peak_dist_min=s1,
                       op=None)  # remove redundant peaks
    print("peak number reduced to %d" % len(result))
    if pick_num is None:
        pass
    elif pick_num < len(res):
        res = res[:pick_num]

    print("T=m+t*(M-m)/20 \nT=%f m=%f t=%f M=%f" % (T, m, t, M))
    return res