Beispiel #1
0
def process(op):
    with open(op['input data json file']) as f:
        dj = json.load(f)
    if 'test' in op:
        if ('sample_num' in op['test']) and (op['test']['sample_num'] > 0) and (len(dj) > op['test']['sample_num']):
            print(('testing the procedure using a subsample of %d subtomograms' % op['test']['sample_num']))
            dj = random.sample(dj, op['test']['sample_num'])
    mat = None
    for (i, d) in enumerate(dj):
        print('\rloading', i, '            ', end=' ')
        sys.stdout.flush()
        v = IF.read_mrc_vol(d['subtomogram'])
        if op['mode'] == 'pose':
            vr = GR.rotate_pad_mean(v, rm=N.array(d['pose']['rm']), c1=N.array(d['pose']['c']))
        elif op['mode'] == 'template':
            vr = GR.rotate_pad_mean(v, angle=N.array(d['angle']), loc_r=N.array(d['loc']))
        else:
            raise Exception('op[mode]')
        if mat is None:
            mat = N.zeros((len(dj), vr.size))
        mat[i, :] = vr.flatten()
    if 'PCA' in op:
        import aitom.tomominer.dimension_reduction.empca as drempca
        pca = drempca.empca(data=mat, weights=N.ones(mat.shape), nvec=op['PCA']['n_dims'], niter=op['PCA']['n_iter'])
        mat_km = pca.coeff
    else:
        mat_km = mat
    km = SC.KMeans(n_clusters=op['kmeans']['cluster num'], n_init=op['kmeans']['n_init'],
                   n_jobs=(op['kmeans']['n_jobs'] if ('n_jobs' in op['kmeans']) else (-1)),
                   verbose=op['kmeans']['verbose'])
    lbl = km.fit_predict(mat_km)
    dj_new = []
    for (i, d) in enumerate(dj):
        dn = {}
        if 'id' in d:
            dn['id'] = d['id']
        dn['subtomogram'] = d['subtomogram']
        dn['cluster_label'] = int(lbl[i])
        dj_new.append(dn)
    op['output data json file'] = os.path.abspath(op['output data json file'])
    if not os.path.isdir(os.path.dirname(op['output data json file'])):
        os.makedirs(os.path.dirname(op['output data json file']))
    with open(op['output data json file'], 'w') as f:
        json.dump(dj_new, f, indent=2)
    clus_dir = os.path.join(op['out dir'], 'vol-avg')
    if not os.path.isdir(clus_dir):
        os.makedirs(clus_dir)
    clus_stat = []
    for l in set(lbl.tolist()):
        avg_file_name = os.path.abspath(os.path.join(clus_dir, ('%03d.mrc' % (l,))))
        v_avg = mat[(lbl == l), :].sum(axis=0).reshape(v.shape)
        IF.put_mrc(mrc=v_avg, path=avg_file_name, overwrite=True)
        clus_stat.append(
            {'cluster_label': l, 'size': len([_ for _ in lbl if (_ == l)]), 'subtomogram': avg_file_name, })
    op['output cluster stat file'] = os.path.abspath(op['output cluster stat file'])
    if not os.path.isdir(os.path.dirname(op['output cluster stat file'])):
        os.makedirs(os.path.dirname(op['output cluster stat file']))
    with open(op['output cluster stat file'], 'w') as f:
        json.dump(clus_stat, f, indent=2)
Beispiel #2
0
def main():
    with open('aligned_refine__op.json') as f:
        op = json.load(f)
    out_dir = os.path.abspath(op['out_dir'])
    if (not os.path.isdir(out_dir)):
        os.makedirs(out_dir)
    with open(op['data_file']) as f:
        dj = json.load(f)
    for d in dj:
        if (not os.path.isabs(d['subtomogram'])):
            d['subtomogram'] = os.path.abspath(
                os.path.join(os.path.dirname(op['data_file']),
                             d['subtomogram']))
        if (not os.path.isabs(d['mask'])):
            d['mask'] = os.path.abspath(
                os.path.join(os.path.dirname(op['data_file']), d['mask']))
    pmpg_file = os.path.join(out_dir, 'pmpg.pickle')
    if os.path.isfile(pmpg_file):
        print('loading existing result file', pmpg_file)
        with open(pmpg_file, 'rb') as f:
            pmpg = pickle.load(f)
    else:
        pmpg_dp_op = {}
        pmpg_ga_op = copy.deepcopy(op['genetic_algorithm'])
        pmpg_ga_op['sum_min'] = op['min_sample_num']
        pmpg_ga_op['evaluate']['ssnr']['mask_sum_threshold'] = op[
            'min_sample_num']
        pmpg = {}
        pmpg['dp'] = PMPG.data_prepare(dj=dj, op=pmpg_dp_op)
        pmpg['full_set'] = {}
        pmpg['full_set']['evaluate'] = [
            PMPG.ga_evaluate__single(l=N.ones(len(dj)),
                                     stat=pmpg['dp'],
                                     op=pmpg_ga_op['evaluate'])
        ]
        PMPG.ga_evaluate__scoring(pmpg['full_set']['evaluate'],
                                  op=pmpg_ga_op['evaluate']['scoring'])
        pmpg['best'] = PMPG.ga(stat=pmpg['dp'], op=pmpg_ga_op)
        pmpg['dj'] = [
            dj[_] for _ in range(len(dj)) if (pmpg['best']['p'][(0, _)] == 1)
        ]
        del pmpg['dp']
        with open(pmpg_file, 'wb') as f:
            pickle.dump(pmpg, f, protocol=(-1))
        print(pmpg['full_set']['evaluate'][0]['score'])
    print('score for the full set of', len(dj), 'subtomograms:',
          pmpg['full_set']['evaluate'][0]['score'])
    print('score for the', pmpg['best']['p'][0, :].sum(),
          'selected subtomograms:', pmpg['best']['e'][0]['score'])
    avg_re = average(dj=pmpg['dj'], mask_count_threshold=op['min_sample_num'])
    avg_dir = os.path.join(op['out_dir'], 'avg')
    if (not os.path.isdir(avg_dir)):
        os.makedirs(avg_dir)
    IF.put_mrc(avg_re['v'], os.path.join(avg_dir, 'vol_avg.mrc'))
    IF.put_mrc(avg_re['m'], os.path.join(avg_dir, 'mask_avg.mrc'))
    with open(os.path.join(out_dir, 'data_selected.json'), 'w') as f:
        json.dump(pmpg['dj'], f, indent=2)
v = MU.generate_toy_model(dim_siz=64)  # generate a pseudo density map
print(v.shape)

# randomly rotate and translate v
loc_proportion = 0.1
loc_max = N.array(v.shape, dtype=float) * loc_proportion
angle = GAL.random_rotation_angle_zyz()
loc_r = (N.random.random(3) - 0.5) * loc_max
vr = GR.rotate(v, angle=angle, loc_r=loc_r, default_val=0.0)

# generate simulated subtomogram vb from v
vb = TSRSC.do_reconstruction(vr, op, verbose=True)
print('vb', 'mean', vb.mean(), 'std', vb.std(), 'var', vb.var())

# save v and vb as 3D grey scale images
TIF.put_mrc(vb, '/tmp/vb.mrc', overwrite=True)
TIF.put_mrc(v, '/tmp/v.mrc', overwrite=True)

# save images of the slices of the corresponding 3D iamges for visual inspection
import aitom.image.io as IIO
import aitom.tomominer.image.vol.util as TIVU
IIO.save_png(TIVU.cub_img(vb)['im'], "/tmp/vb.png")
IIO.save_png(TIVU.cub_img(v)['im'], "/tmp/v.png")

if True:
    # verify the correctness of SNR estimation
    vb_rep = TSRSC.do_reconstruction(vr, op, verbose=True)

    import scipy.stats as SS
    # calculate SNR
    vb_corr = SS.pearsonr(vb.flatten(), vb_rep.flatten())[0]