Example #1
0
 def dump_subvol(self,picking_result):
     from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes
     subvols_loc = os.path.join(self.dump_path,"demo_single_particle_subvolumes.pickle")
     a = io_file.read_mrc_data(self.path)
     d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], picking_result, 32)
     io_file.pickle_dump(d, subvols_loc)
     print("Save subvolumes .pickle file to:", subvols_loc)
Example #2
0
def load_dict(path):
    if not os.path.isfile(path):
        d = {}
        AIF.pickle_dump(d, path)
    else:
        d = AIF.pickle_load(path)
    return d
Example #3
0
def combine_subtom(out_dir,pickle_path):
    subvols_loc = os.path.join(out_dir,'selected_demo_single_particle_subvolumes.pickle')
    pickle_data = AIF.pickle_load(pickle_path)
    d = AIF.pickle_load(subvols_loc)
    subvols = []
    for v in d['vs'].values():
        if v['v'] is not None:
            subvols.append(v['v'])
    
    subtom = pickle_data['1KP8_data'] + pickle_data['1KP8_data'] +subvols[:100]
    print('Total subtomograms: ',len(subtom))
    subvols_loc = os.path.join(out_dir,'subvolumes.pickle') 
    d = {}
    d['v_siz'] = np.array([32,32,32])
    d['vs'] = {}
    labels = {}
    for i in range(len(subtom)):
        uuid_i = str(uuid.uuid4())
        d['vs'][uuid_i] = {}
        d['vs'][uuid_i]['center'] = None
        d['vs'][uuid_i]['id'] = uuid_i
        d['vs'][uuid_i]['v'] = subtom[i]
        d['vs'][uuid_i]['label'] = int(i/100)

    AIF.pickle_dump(d, subvols_loc)
    print("Save subvolumes .pickle file to:", subvols_loc)
Example #4
0
def particle_picking(mrc_header):

    sigma1 = max(int(7 / voxel_spacing_in_nm),
                 2)  # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
    print('sigma1=%d' % sigma1)
    # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1.
    # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels)
    # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)

    partition_op = {'nonoverlap_width': sigma1 * 20, 'overlap_width': sigma1 * 10, 'save_vg': False}
    result = picking(path, s1=sigma1, s2=sigma1 * 1.1, t=3, find_maxima=False, partition_op=partition_op,
                     multiprocessing_process_num=10, pick_num=1000)
    print("DoG done, %d particles picked" % len(result))
    pprint(result[:5])

    # (Optional) Save subvolumes of peaks for autoencoder input
    dump_subvols = True
    if dump_subvols:  # use later for autoencoder
        subvols_loc = "demo_single_particle_subvolumes.pickle"
        from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes

        a = io_file.read_mrc_data(path)
        d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32)
        io_file.pickle_dump(d, subvols_loc)
        print("Save subvolumes .pickle file to:", subvols_loc)
Example #5
0
    def select(self,remove_particles,pick_num):
        d = io_file.pickle_load(os.path.join(self.dump_path,"demo_single_particle_subvolumes.pickle"))
        subvols_loc = os.path.join(self.dump_path,"selected_demo_single_particle_subvolumes.pickle")
        particles_num = pick_num
        result = {}
        result['v_siz'] = d['v_siz']
        result['vs'] = {}
        remove_particles = np.array(remove_particles)
        # d = {v_siz:(32,32,32), vs:{uuid0:{center, v, id}, uuid1:{center, v, id} ... }}

        for i in range(len(self.centers)):
            if i in remove_particles:
                continue
            uuid_i = self.uuids[i]
            result['vs'][uuid_i] = d['vs'][uuid_i]
            if len(result['vs']) >= particles_num:
                break
        assert len(result['vs']) == particles_num
        # subvols_loc = './tmp/picking/selected_demo_single_particle_subvolumes.pickle'
        AIF.pickle_dump(result, subvols_loc)
        print("Save subvolumes .pickle file to:", subvols_loc) 
Example #6
0
def average(dj_init=None,
            img_db=None,
            djs_file=None,
            avgs_file=None,
            pcas_file=None,
            op=None):
    djs = load_dict(op['data_checkpoint'])
    avgs = load_dict(op['average']['checkpoint'])

    if -1 not in djs:
        # store initial data
        assert len(djs) == 0
        djs[-1] = dj_init
        AIF.pickle_dump(djs, op['data_checkpoint'])

    dj = djs[-1]
    for pass_i in range(op['option']['pass_num']):
        print('pass_i', pass_i)
        if pass_i in djs:
            dj = djs[pass_i]
            continue

        dj = copy.deepcopy(
            dj)  # make a copy of the previous pass, for an update

        c = str(uuid.uuid4())
        avg_t = vol_avg(dj=dj, op=op['average'], img_db=img_db)
        avgs[c] = avg_t
        avgs[c]['pass_i'] = pass_i
        avgs[c]['id'] = c
        AIF.pickle_dump(avgs, op['average']['checkpoint'])
        print('averaging done')

        # re-align subtomograms
        al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db)
        a = align_all_pairs__select_best(al)
        for d in dj:
            i = d['subtomogram']
            d['loc'] = a[i]['loc']
            d['angle'] = a[i]['angle']
            d['score'] = a[i]['score']
            d['template_id'] = a[i]['template_id']
        print('re-align done')

        djs[pass_i] = dj
        AIF.pickle_dump(djs, op['data_checkpoint'])
Example #7
0
File: faml.py Project: xut006/aitom
def EM(img_data,
       K,
       iteration,
       path,
       snapshot_interval=5,
       reg=False,
       use_voronoi=True):
    """
    The main estimation-maximization algorithm
    """
    np.seterr(all='ignore')
    X = get_image_db(img_data['db_path'])
    dj = img_data['dj']

    N = len(dj)
    n_x, n_y, n_z = X[dj[0]['v']].shape

    theta = dict()
    theta['N'] = N
    theta['J'] = n_x * n_y * n_z
    theta['n'] = n_x
    theta['K'] = K
    # Proportional to the radius of the image
    theta['xi'] = theta['n']
    # We need to initialize this later
    theta['A'] = np.zeros([K, n_x, n_y, n_z], dtype=np.complex128)
    theta['alpha'] = np.ones([K], dtype=np.float_) / K
    theta['trans_list'] = None
    theta['predictions'] = np.zeros([N])

    # Print relavent information
    print("Running model based alignment: N=%d, K=%d, dimensions=(%d,%d,%d)" %
          (N, K, n_x, n_y, n_z))
    if reg:
        print("With regularization")
    else:
        print("Without regularization")
    if use_voronoi:
        print("With voronoi weights")
    else:
        print("Without voronoi weights")

    # Regularization
    reg_step = (float(N) / K**2) / 2
    theta['theta_reg'] = 5 * reg_step if reg else 0

    # Sample K random data points from the set to initialize A
    indices = np.random.permutation(N)
    num_models = [0 for _ in range(K)]
    k = 0
    for i in range(N):
        theta['A'][k] += X[dj[indices[i]]['v']] * X[dj[indices[i]]['m']]
        num_models[k] += 1
        k = (k + 1) % K

    for k in range(K):
        theta['A'][k] /= num_models[k]

    # Get a random A_k and a random X_i and calculate sum_j to get sigma_sq
    k = np.random.randint(K)
    i = np.random.randint(N)
    sum_j = np.sum(
        np.square(np.absolute(theta['A'][k] - X[dj[i]['v']]) * X[dj[i]['m']]))
    theta['sigma_sq'] = sum_j / theta['J']
    print("Sigma_sq initialized to %d" % theta['sigma_sq'])

    checkpoint_dir = os.path.join(path, 'checkpoints')
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)

    interval = snapshot_interval
    for i in range(iteration):
        checkpoint_file = os.path.join(checkpoint_dir, '%08d.pickle' % i)
        if os.path.exists(checkpoint_file):
            checkpoint_data = AIF.pickle_load(checkpoint_file)
            theta = checkpoint_data['theta']
            continue

        if i % interval == 0:
            output_images(theta, i, path=path)

        print("Running iteration %d" % (i + 1))
        # Update alpha before updating A
        compute_trans_list(theta=theta,
                           img_data=img_data,
                           use_voronoi=use_voronoi)

        alpha = update_alpha(img_data=img_data,
                             theta=theta,
                             use_voronoi=use_voronoi)
        print("Alpha updated! Alpha = ", end=' ')
        print(alpha.tolist())

        sigma_sq = update_sigma(img_data=img_data,
                                theta=theta,
                                reg=reg,
                                use_voronoi=use_voronoi)
        print("Sigma updated! Sigma^2 = ", end=' ')
        print(sigma_sq)

        xi = update_xi(img_data=img_data, theta=theta, use_voronoi=use_voronoi)
        print("Xi updated! Xi = ", end=' ')
        print(xi)

        A = update_a(img_data=img_data,
                     theta=theta,
                     alpha=alpha,
                     reg=reg,
                     use_voronoi=use_voronoi)
        print("A updated! Average intensity of A = ", end=' ')
        print(np.average(A, (1, 2, 3)))

        theta['alpha'] = alpha
        theta['sigma_sq'] = sigma_sq
        theta['xi'] = xi
        theta['A'] = A
        # Since we changed the models A, the list of optimal transforms
        # needs to be re-calculated
        theta['trans_list'] = None
        theta['pred'] = None

        # Decrease the regularization coefficient
        if reg and theta['theta_reg'] > 0:
            theta['theta_reg'] -= reg_step
            theta['theta_reg'] = max(0, theta['theta_reg'])

        try:
            assert not os.path.exists(checkpoint_file)
        except:
            raise Exception("Checkpoint file already exists!")
        AIF.pickle_dump({'theta': theta}, checkpoint_file)

    print_prediction_results(theta, img_data)
    output_images(theta, iteration, path=path)
    print("Prediction from model: ", end=' ')
    print(theta['predictions'])
    return theta
Example #8
0
def main():
    # Download from: https://cmu.box.com/s/9hn3qqtqmivauus3kgtasg5uzlj53wxp
    path = '/ldap_shared/home/v_zhenxi_zhu/data/aitom_demo_single_particle_tomogram.mrc'

    # Also, we can crop and only use part of the mrc image instead of binning for tasks requiring higher resolution
    # crop_path = 'cropped.mrc'
    # crop_mrc(path, crop_path)

    mrc_header = io_file.read_mrc_header(path)
    voxel_spacing_in_nm = mrc_header['MRC']['xlen'] / mrc_header['MRC'][
        'nx'] / 10
    sigma1 = max(
        int(7 / voxel_spacing_in_nm), 2
    )  # In general, 7 is optimal sigma1 val in nm according to the paper and sigma1 should at least be 2
    print('sigma1=%d' % sigma1)
    # For particular tomogram, larger sigma1 value may have better results. Use IMOD to display selected peaks and determine best sigma1.
    # For 'aitom_demo_cellular_tomogram.mrc', sigma1 is 5 rather than 3 for better performance(in this tomogram, 7nm corresponds to 3.84 pixels)
    # print(mrc_header['MRC']['xlen'], mrc_header['MRC']['nx'], voxel_spacing_in_nm, sigma1)

    partition_op = {
        'nonoverlap_width': sigma1 * 20,
        'overlap_width': sigma1 * 10,
        'save_vg': False
    }
    result = picking(path,
                     s1=sigma1,
                     s2=sigma1 * 1.1,
                     t=3,
                     find_maxima=False,
                     partition_op=partition_op,
                     multiprocessing_process_num=10,
                     pick_num=1000)
    print("DoG done, %d particles picked" % len(result))
    pprint(result[:5])

    # (Optional) Save subvolumes of peaks for autoencoder input
    dump_subvols = True
    if dump_subvols:  # use later for autoencoder
        subvols_loc = "demo_single_particle_subvolumes.pickle"
        from aitom.classify.deep.unsupervised.autoencoder.autoencoder_util import peaks_to_subvolumes
        a = io_file.read_mrc_data(path)
        d = peaks_to_subvolumes(im_vol_util.cub_img(a)['vt'], result, 32)
        io_file.pickle_dump(d, subvols_loc)
        print("Save subvolumes .pickle file to:", subvols_loc)

    # Display selected peaks using imod/3dmod (http://bio3d.colorado.edu/imod/)
    '''
    #Optional: smooth original image
    a = io_file.read_mrc_data(path) 
    path =path[:-5]+'_smoothed'+path[-4:]
    temp = im_vol_util.cub_img(a)
    s1 = sigma1
    s2=sigma1*1.1
    vg = dog_smooth(temp['vt'], s1,s2)
    #vg = smooth(temp['vt'], s1)
    TIM.write_data(vg,path)
    '''
    json_data = []  # generate file for 3dmod
    for i in range(len(result)):
        loc_np = result[i]['x']
        loc = []
        for j in range(len(loc_np)):
            loc.append(loc_np[j].tolist())
        json_data.append({'peak': {'loc': loc}})
    with open('data_json_file.json', 'w') as f:
        json.dump(json_data, f)

    dj = json_data
    x = N.zeros((len(dj), 3))
    for i, d in enumerate(dj):
        x[i, :] = N.array(d['peak']['loc'])

    l = generate_lines(x_full=x, rad=sigma1)
    display_map_with_lines(l=l, map_file=path)
Example #9
0
def classify(dj_init=None,
             img_db=None,
             djs_file=None,
             avgs_file=None,
             pcas_file=None,
             op=None):
    """
    classify

    parameters:
        dj_init: a list of dicts, where each element looks like:
            {'subtomogram':v_id,
            'mask':mask_id,
            'angle':ang_t,
            'loc':loc_t,
            'model_id':model_id}
        img_db: a dict to find subtomogram data by its uuid (img_db[uuid] is a 3D np array)

    result(pickle file):
        average results of each class
    """
    djs = load_dict(op['data_checkpoint'])
    pcas = load_dict(op['dim_reduction']['pca']['checkpoint'])
    clus = load_dict(op['clustering']['checkpoint'])
    avgs = load_dict(op['average']['checkpoint'])

    if -1 not in djs:
        # store initial data
        assert len(djs) == 0
        djs[-1] = dj_init
        AIF.pickle_dump(djs, op['data_checkpoint'])

    dj = djs[-1]
    for pass_i in range(op['option']['pass_num']):
        if pass_i in djs:
            dj = djs[pass_i]
            continue

        # make a copy of the previous pass, for an update
        dj = copy.deepcopy(dj)

        if pass_i not in pcas:
            red = covariance_filtered_pca(dj=dj,
                                          img_db=img_db,
                                          templates=avgs,
                                          pca_op=op['dim_reduction']['pca'])
            # print(type(red))
            pcas[pass_i] = red
            AIF.pickle_dump(pcas, op['dim_reduction']['pca']['checkpoint'])
        else:
            red = copy.deepcopy(pcas[pass_i])

        if pass_i not in clus:
            lbl = kmeans_clustering(x=red, k=op['clustering']['kmeans_k'])
            clus[pass_i] = lbl
            AIF.pickle_dump(clus, op['clustering']['checkpoint'])
        else:
            lbl = clus[pass_i]
        # print('lbl', lbl)

        for d in dj:
            d['cluster'] = lbl[d['subtomogram']]

        # calculate cluster averages
        new_avgs = set()
        for c in set([lbl[_] for _ in lbl]):
            # print('c', c)
            if c in avgs:
                continue

            avg_t = vol_avg(dj=[_ for _ in dj if _['cluster'] == c],
                            op=op['average'],
                            img_db=img_db)
            if avg_t is None:
                continue

            avgs[c] = avg_t
            avgs[c]['pass_i'] = pass_i
            avgs[c]['id'] = c

            new_avgs.add(c)

        if len(new_avgs) > 0:
            AIF.pickle_dump(avgs, op['average']['checkpoint'])

        # print('avgs')
        # for key in avgs:
        #     print('\n',pass_i,key)
        #     for key2 in avgs[key]:
        #         print(key2)
        #     print(avgs[key]['pass_i'],avgs[key]['id'])

        # re-align subtomograms
        al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db)
        a = align_all_pairs__select_best(al)
        for d in dj:
            i = d['subtomogram']
            d['loc'] = a[i]['loc']
            d['angle'] = a[i]['angle']
            d['score'] = a[i]['score']
            d['template_id'] = a[i]['template_id']

        djs[pass_i] = dj
        AIF.pickle_dump(djs, op['data_checkpoint'])
Example #10
0
def encoder_simple_conv_test(d, pose, img_org_file, out_dir, clus_num):
    if pose:
        assert img_org_file is not None

        tom0 = auto.read_mrc_numpy_vol(img_org_file)
        tom = AFG.smooth(tom0, 2.0)
        x_keys = [_ for _ in d['vs'] if d['vs'][_]['v'] is not None]

        x_train_no_pose = [N.expand_dims(d['vs'][_]['v'], -1) for _ in x_keys]
        x_train_no_pose = N.array(x_train_no_pose)
        x_center = [d['vs'][_]['center'] for _ in x_keys]

        x_train = []
        default_val = tom.mean()
        x_train_no_pose -= x_train_no_pose.max()
        x_train_no_pose = N.abs(x_train_no_pose)

        print('pose normalizing')
        for i in range(len(x_train_no_pose)):
            center = x_center[i]
            v = x_train_no_pose[i][:, :, :, 0]
            c = auto.center_mass(v)

            # calculate principal directions
            rm = auto.pca(v=v, c=c)['v']
            mid_co = (N.array(v.shape) - 1) / 2.0
            loc_r__pn = rm.T.dot(mid_co - c)

            # pose normalize so that the major axis is along x-axis
            vr = auto.rotate_retrieve(v,
                                      tom=tom,
                                      rm=rm,
                                      center=center,
                                      loc_r=loc_r__pn,
                                      default_val=default_val)
            x_train.append(vr)

        x_train = N.array(x_train)
        x_train = N.expand_dims(x_train, axis=4)

        print('pose normalization finished')

    else:
        x_keys = [_ for _ in d['vs'] if d['vs'][_]['v'] is not None]

        x_train = [N.expand_dims(d['vs'][_]['v'], -1) for _ in x_keys]
        x_train = N.array(x_train)

    if False:
        # warning, if you normalize here, you need also to normalize when decoding.
        # so it is better not normalize. Use batch normalization in the network instead
        if True:
            x_train -= x_train.mean()
            x_train /= x_train.std()
        else:
            x_train -= x_train.min()
            x_train /= x_train.max()
            x_train -= 0.5
            x_train *= 2

    # print('x_train.shape', x_train.shape)

    model_dir = op_join(out_dir, 'model')
    if not os.path.isdir(model_dir):
        os.makedirs(model_dir)

    model_autoencoder_checkpoint_file = op_join(
        model_dir, 'model-autoencoder--weights--best.h5')
    model_autoencoder_file = op_join(model_dir, 'model-autoencoder.h5')
    model_encoder_file = op_join(model_dir, 'model-encoder.h5')
    model_decoder_file = op_join(model_dir, 'model-decoder.h5')

    if not os.path.isfile(model_autoencoder_file):
        enc = encoder_simple_conv(img_shape=d['v_siz'])
        autoencoder = enc['autoencoder']

        autoencoder_p = autoencoder

        from keras.optimizers import Adam
        # choose a proper lr to control convergance speed, and val_loss
        adam = Adam(lr=0.001, beta_1=0.9, decay=0.001 / 500)
        # sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
        autoencoder_p.compile(optimizer=adam, loss='mean_squared_error')

        if os.path.isfile(model_autoencoder_checkpoint_file):
            print('loading previous best weights',
                  model_autoencoder_checkpoint_file)
            autoencoder_p.load_weights(model_autoencoder_checkpoint_file)

        from keras.callbacks import EarlyStopping, ModelCheckpoint
        earlyStopping = EarlyStopping(monitor='val_loss',
                                      patience=20,
                                      verbose=0,
                                      mode='auto')
        checkpoint = ModelCheckpoint(model_autoencoder_checkpoint_file,
                                     monitor='val_loss',
                                     verbose=1,
                                     save_best_only=True,
                                     mode='auto')
        # use a large batch size when batch normalization is used
        autoencoder_p.fit(x_train,
                          x_train,
                          nb_epoch=100,
                          batch_size=128,
                          shuffle=True,
                          validation_split=0.1,
                          callbacks=[checkpoint, earlyStopping])

        # we use the best weights for subsequent analysis
        autoencoder_p.load_weights(model_autoencoder_checkpoint_file)

        enc['autoencoder'].save(model_autoencoder_file)
        enc['encoder'].save(model_encoder_file)
        enc['decoder'].save(model_decoder_file)
    else:
        import keras.models as KM

        enc = dict()
        enc['autoencoder'] = KM.load_model(model_autoencoder_file)
        enc['encoder'] = KM.load_model(model_encoder_file)
        enc['decoder'] = KM.load_model(model_decoder_file)

    x_enc = enc['encoder'].predict(x_train)

    # use kmeans to seperate x_enc into a specific number of clusters,
    # then decode cluster centers, and patch the decoded cluster centers back to the image,
    # can use mayavi???

    import multiprocessing
    from sklearn.cluster import KMeans
    kmeans_n_init = multiprocessing.cpu_count()

    kmeans = KMeans(n_clusters=clus_num, n_jobs=-1, n_init=100).fit(x_enc)
    x_km_cent = N.array(
        [_.reshape(x_enc[0].shape) for _ in kmeans.cluster_centers_])
    x_km_cent_pred = enc['decoder'].predict(x_km_cent)

    # save cluster info and cluster centers
    clus_center_dir = op_join(out_dir, 'clus-center')
    if not os.path.isdir(clus_center_dir): os.makedirs(clus_center_dir)

    kmeans_clus = defaultdict(list)
    for i, l in enumerate(kmeans.labels_):
        kmeans_clus[l].append(x_keys[i])
    AIF.pickle_dump(kmeans_clus, op_join(clus_center_dir, 'kmeans.pickle'))

    ccents = {}
    for i in range(len(x_km_cent_pred)):
        ccents[i] = x_km_cent_pred[i].reshape(d['v_siz'])
    AIF.pickle_dump(ccents, op_join(clus_center_dir, 'ccents.pickle'))
    AIF.pickle_dump(x_km_cent, op_join(clus_center_dir, 'ccents_d.pickle'))
Example #11
0
    else:
        class_num = 1
    for model_id in range(class_num):
        for v_i in range(v_num):
            ang_t = [_ for _ in N.random.random(3) * (N.pi * 2)]
            # loc_t = TGA.random_translation(size=[v_dim_siz]*3, proportion=0.2)
            loc_t = [0.0, 0.0, 0.0]
            v_id = str(uuid.uuid4())
            dj.append({
                'subtomogram': v_id,
                'mask': mask_id,
                'angle': ang_t,
                'loc': loc_t,
                'model_id': model_id
            })
    AIF.pickle_dump(dj, dj_file)

    sim_op = {
        'model': {
            'missing_wedge_angle': wedge_angle,
            'titlt_angle_step': 1,
            'SNR': 1000,
            'band_pass_filter': False,
            'use_proj_mask': False
        },
        'ctf': {
            'pix_size': 1.0,
            'Dz': -5.0,
            'voltage': 300,
            'Cs': 2.0,
            'sigma': 0.4
Example #12
0
def single_average(subtom):
    print('subtom_type=', type(subtom))
    assert len(subtom) == 100
    print('subtom[0]_type=', type(subtom[0]))
    average = True

    test_dir = './tmp/cls-test/' + str(uuid.uuid4())  # test dir
    if os.path.exists(test_dir):
        shutil.rmtree(test_dir)
    os.makedirs(test_dir)

    dj_file = os.path.join(test_dir, 'data.pickle')
    img_db_file = os.path.join(test_dir, 'image.db')

    v_num = 100  # the number of each class
    v_dim_siz = 32
    wedge_angle = 30
    mask_id = str(uuid.uuid4())
    dj = []
    class_num = 1

    for model_id in range(class_num):
        for v_i in range(v_num):
            ang_t = [_ for _ in N.random.random(3) * (N.pi * 2)]
            # loc_t = TGA.random_translation(size=[v_dim_siz]*3, proportion=0.2)
            loc_t = [0.0, 0.0, 0.0]
            v_id = str(uuid.uuid4())
            dj.append({
                'subtomogram': v_id,
                'mask': mask_id,
                'angle': ang_t,
                'loc': loc_t,
                'model_id': model_id
            })
    AIF.pickle_dump(dj, dj_file)

    sim_op = {
        'model': {
            'missing_wedge_angle': wedge_angle,
            'titlt_angle_step': 1,
            'SNR': 1000,
            'band_pass_filter': False,
            'use_proj_mask': False
        },
        'ctf': {
            'pix_size': 1.0,
            'Dz': -5.0,
            'voltage': 300,
            'Cs': 2.0,
            'sigma': 0.4
        }
    }

    img_db = TIDL.LSM(img_db_file)
    index = 0
    for d in dj:
        img_db[d['subtomogram']] = subtom[index].astype(N.float)
        # print(img_db[d['subtomogram']].shape)
        index = index + 1

    import aitom.image.vol.wedge.util as TIVWU
    img_db[mask_id] = TIVWU.wedge_mask(size=[v_dim_siz] * 3, ang1=wedge_angle)
    print('file generation complete')

    out_dir = os.path.join(test_dir, 'out')
    if os.path.exists(out_dir): shutil.rmtree(out_dir)
    os.makedirs(out_dir)
    from aitom.classify.align.simple_iterative.classify import randomize_orientation
    from aitom.classify.align.simple_iterative.classify import export_avgs
    if average:
        import aitom.average.align.simple_iterative.average as avg
        op = {}
        op['option'] = {'pass_num': 20}  # the number of iterations
        op['data_checkpoint'] = os.path.join(out_dir, 'djs.pickle')
        op['average'] = {}
        op['average']['mask_count_threshold'] = 2
        op['average']['checkpoint'] = os.path.join(out_dir, 'avgs.pickle')

        dj = AIF.pickle_load(os.path.join(test_dir, 'data.pickle'))
        img_db = TIDL.LSM(os.path.join(test_dir, 'image.db'), readonly=True)

        randomize_orientation(dj)
        avg.average(dj_init=dj, img_db=img_db, op=op)

        export_avgs(AIF.pickle_load(os.path.join(out_dir, 'avgs.pickle')),
                    out_dir=os.path.join(out_dir, 'avgs-export'))
        print('averaging done')

    # visualization
    # test_dir = './tmp/cls-test/'+str(uuid.uuid4())  # test dir
    avgs = pickle_load(os.path.join(test_dir, 'out/avgs.pickle'))
    out_dir = os.path.join(test_dir, 'image')
    if os.path.exists(out_dir): shutil.rmtree(out_dir)
    os.makedirs(out_dir)
    for i in avgs.keys():
        v = avgs[i]['v']
        file_name = str(avgs[i]['pass_i']) + '_' + str(i) + '.png'
        save_png(cub_img(v)['im'], os.path.join(out_dir, file_name))
    print('images saved in', out_dir)
Example #13
0
def classify(dj_init=None, img_db=None, djs_file=None, avgs_file=None, pcas_file=None, op=None):
    djs = load_dict(op['data_checkpoint'])
    pcas = load_dict(op['dim_reduction']['pca']['checkpoint'])
    clus = load_dict(op['clustering']['checkpoint'])
    avgs = load_dict(op['average']['checkpoint'])

    if -1 not in djs:
        # store initial data
        assert len(djs) == 0
        djs[-1] = dj_init
        AIF.pickle_dump(djs, op['data_checkpoint'])

    dj = djs[-1]
    for pass_i in range(op['option']['pass_num']):
        if pass_i in djs:
            dj = djs[pass_i]
            continue

        dj = copy.deepcopy(dj)  # make a copy of the previous pass, for an update

        if pass_i not in pcas:
            red = covariance_filtered_pca(dj=dj, img_db=img_db, templates=avgs, pca_op=op['dim_reduction']['pca'])
            # print(type(red))
            pcas[pass_i] = red
            AIF.pickle_dump(pcas, op['dim_reduction']['pca']['checkpoint'])
        else:
            red = copy.deepcopy(pcas[pass_i])

        if pass_i not in clus:
            lbl = kmeans_clustering(x=red, k=op['clustering']['kmeans_k'])
            clus[pass_i] = lbl
            AIF.pickle_dump(clus, op['clustering']['checkpoint'])
        else:
            lbl = clus[pass_i]
        # print('lbl', lbl)

        for d in dj:        d['cluster'] = lbl[d['subtomogram']]

        # calculate cluster averages
        new_avgs = set()
        for c in set([lbl[_] for _ in lbl]):
            # print('c', c)
            if c in avgs:    continue

            avg_t = vol_avg(dj=[_ for _ in dj if _['cluster'] == c], op=op['average'], img_db=img_db)
            if avg_t is None:   continue

            avgs[c] = avg_t
            avgs[c]['pass_i'] = pass_i
            avgs[c]['id'] = c

            new_avgs.add(c)

        if len(new_avgs) > 0:       AIF.pickle_dump(avgs, op['average']['checkpoint'])
        '''
        print('avgs')
        for key in avgs:
            print('\n',pass_i,key)
            for key2 in avgs[key]:
                print(key2)
            print(avgs[key]['pass_i'],avgs[key]['id'])
        '''
        # re-align subtomograms
        al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db)
        a = align_all_pairs__select_best(al)
        for d in dj:
            i = d['subtomogram']
            d['loc'] = a[i]['loc']
            d['angle'] = a[i]['angle']
            d['score'] = a[i]['score']
            d['template_id'] = a[i]['template_id']

        djs[pass_i] = dj
        AIF.pickle_dump(djs, op['data_checkpoint'])
Example #14
0
def average(dj_init=None,
            img_db=None,
            djs_file=None,
            avgs_file=None,
            pcas_file=None,
            op=None):
    """
    parameters:
    dj_init:
        a list of dicts, where each element looks like:
        {'subtomogram':v_id,
         'mask':mask_id,
          'angle':ang_t,
          'loc':loc_t,
          'model_id':model_id}
    img_db:
        a dict to find subtomogram data by its uuid (img_db[uuid] is a 3D np array)
        it contains only one class

    result(pickle file):
        average result, the same shape as original subtomogram
    """
    djs = load_dict(op['data_checkpoint'])
    avgs = load_dict(op['average']['checkpoint'])

    if -1 not in djs:
        # store initial data
        assert len(djs) == 0
        djs[-1] = dj_init
        AIF.pickle_dump(djs, op['data_checkpoint'])

    dj = djs[-1]
    for pass_i in range(op['option']['pass_num']):
        print('pass_i', pass_i)
        if pass_i in djs:
            dj = djs[pass_i]
            continue

        # make a copy of the previous pass, for an update
        dj = copy.deepcopy(dj)

        c = str(uuid.uuid4())
        avg_t = vol_avg(dj=dj, op=op['average'], img_db=img_db)
        avgs[c] = avg_t
        avgs[c]['pass_i'] = pass_i
        avgs[c]['id'] = c
        AIF.pickle_dump(avgs, op['average']['checkpoint'])
        print('averaging done')

        # re-align subtomograms
        al = align_all_pairs(avgs=avgs, dj=dj, img_db=img_db)
        a = align_all_pairs__select_best(al)
        for d in dj:
            i = d['subtomogram']
            d['loc'] = a[i]['loc']
            d['angle'] = a[i]['angle']
            d['score'] = a[i]['score']
            d['template_id'] = a[i]['template_id']
        print('re-align done')

        djs[pass_i] = dj
        AIF.pickle_dump(djs, op['data_checkpoint'])