Beispiel #1
0
def output_coords(bn,sz,base_dir,outdir,args):
    '''Converts particle maps into coordinates and saves coordinates with the png file'''
    pmap,imdisp,micro = args
    microname   = ft.file_only(micro)
    outmicrodir = os.path.join(outdir,ft.remove_prefix(os.path.dirname(micro),base_dir)[1:])
    ft.mkdir_assure(outmicrodir)

    # particle coordinates in the original micrograph coordinates
    coords = np.column_stack(np.where(pmap > cfg.MIN_CLEARANCE))
    if coords.shape[0] < cfg.MIN_PART_PER_MICRO:
        print "Too few particles detected %d, dropping micrograph %s ..." % (coords.shape[0],microname)
        # coords = np.zeros((0,2),dtype=np.float32)
        return 0
    szpmap = np.int32(pmap.shape)
    szdisp = np.int32(imdisp.shape[:2])
    szbn   = utils.np.int32(np.round(sz / bn))
    # im coordinate system for display
    coords_disp = adjust_coodinates(coords, szpmap, szdisp, cfg.STRIDES[0])
    # original micrograph coordinate system
    coords_orig = adjust_coodinates(coords, szpmap, szbn, cfg.STRIDES[0])*bn
    starname    = os.path.join(outmicrodir, microname + '_manualpick.star')
    save_coords_in_star(starname, coords_orig)
    #### SAVE figure as well ######
    figname = os.path.join(outmicrodir, microname + '.png')
    imrgb   = plot_coord_rgb(np.squeeze(imdisp), {'particles': coords_disp}, cfg.PART_D_PIXELS, cfg.CIRCLE_WIDTH)
    cv2.imwrite(figname,imrgb)
    # return number of particles
    return coords_orig.shape[0]
Beispiel #2
0
 def test_example(self, provider):
     print "Generating example frames from the tfrecord ..."
     featsz = (cfg.PICK_WIN // cfg.STRIDES[0], ) * 2
     out_dir = os.path.join(self.tfrecord_dir, 'example_ground_truth')
     ft.rmtree_assure(out_dir)
     ft.mkdir_assure(out_dir)
     keys = ['image'] + self.classes + ['dxyidxs', 'dxy']
     data = provider.get(keys)
     with tf.Session().as_default() as sess:
         with tf.Graph().as_default():
             with tf.device('/device:CPU:0'):
                 coord = tf.train.Coordinator()
                 tf.train.start_queue_runners(coord=coord)
                 for i in range(N_EXAMPLES):
                     d = dict(zip(keys, sess.run(data)))
                     im = d['image']
                     dd = dict((k, unravel_coords_append(d[k], featsz) *
                                cfg.STRIDES[0]) for k in self.classes)
                     fig = plot_class_coords(im, dd, cfg.PART_D_PIXELS)
                     # save the resulting graph
                     fname = os.path.join(out_dir, 'example_%d' % i)
                     print "saving example %s, %d out of %d" % (fname, i,
                                                                N_EXAMPLES)
                     savefig(fig, fname)
                     # plt.gcf().set_figheight(10.0)
                     # plt.gcf().set_figwidth(10.0)
                     # fig = plt.gcf()
                     # fig.subplots_adjust(wspace=.1, hspace=0.2, left=0.03, right=0.98, bottom=0.05, top=0.93)
                     # fig.savefig(fname)
                     plt.close(fig)
Beispiel #3
0
def mrcs2pngs(fnames, label_dir, split, nlim):
    '''Writes mrc files as png files'''
    ft.mkdir_assure(label_dir)
    nfiles = 0
    for f in fnames:
        nfiles += mrc.shape(f)[0]
    nfiles = int(min(nlim, nfiles))
    bar = pyprind.ProgBar(nfiles,
                          stream=1,
                          title='Writing %d %s pngs into %s ...' %
                          (nfiles, split, label_dir))
    count = 0
    for f in fnames:
        ims = mrc.load(f)
        for idx in range(ims.shape[0]):
            if count == nfiles: break
            png.from_array(float2uint16(ims[idx]), 'L').save(
                os.path.join(label_dir,
                             ft.file_only(f) + '_%d.png' % idx))
            count += 1
            bar.update()
Beispiel #4
0
def convertmrcs2pngs(data_in_dir, pngdir):
    ft.mkdir_assure(pngdir)
    mrcnames = list_dirtree(data_in_dir, '.mrc')
    #mrcnames = [os.path.join(mrcsdict[key],key) for key in mrcsdict]
    # length of input path
    inlen = len(data_in_dir)
    # create png dir struct
    subdirs = set([os.path.dirname(mrcname)[inlen:] for mrcname in mrcnames])
    for dir in subdirs:
        ft.mkdir_assure(os.path.join(pngdir, dir))
    # create png filename list
    pngnames = [
        os.path.join(pngdir, ft.replace_ext(name[inlen:], ''))
        for name in mrcnames
    ]
    num_cores = multiprocessing.cpu_count()
    mrc2pngs(pngnames[0], mrcnames[0])
    print "Converting %d mrcs from %s, to pngs in %s, using %d cores" % \
          (len(mrcnames),data_in_dir,pngdir,num_cores)
    Parallel(n_jobs=2 * num_cores)(
        delayed(mrc2pngs)(pngname, mrcname)
        for pngname, mrcname in zip(pngnames, mrcnames))
Beispiel #5
0
    def __init__(self, data_in_dir, data_out_dir):

        super(ParticleCoords2TFRecord, self).__init__(data_in_dir,
                                                      data_out_dir)

        self._resized_micros_dir = os.path.join(
            ft.updirs(data_in_dir + '/', 1), 'ResizedMicrographs')

        # each subdirectory in data_in_dir corresponds to a separate class
        # each class subdirectory contains symlink to a selection relion job
        topdirs = np.sort(os.walk(data_in_dir).next()[1])
        class2label = {}
        label2class = {}
        allmicros = {}
        for d in range(len(topdirs)):
            cid = topdirs[d].tostring()
            tprint("Listing directory \'%s\' ..." % cid)
            class2label.update({cid: d})
            label2class.update({d: cid})
            # get all star files with particle coordinates for this class
            stars = list_dirtree(os.path.join(data_in_dir, cid),
                                 'particles.star')
            self.add_class_coords(allmicros, stars, cid)

        # initialize box class keys
        self.classes = class2label.keys()
        self.init_feature_keys(self.classes)
        # here allmicros has particle coordinates for each class per micrograph
        tprint("Removing particle overlaping in different classes ...")
        allmicros = ParticleCoords2TFRecord.remove_class_overlap(allmicros)

        # count all particles
        classcnt = np.zeros(len(class2label))
        for micro in allmicros:
            for cls in allmicros[micro]['coords']:
                classcnt[class2label[cls]] += len(
                    allmicros[micro]['coords'][cls])
        # print totals
        # print "Total micrographs %d" % len(allmicros)
        for label in label2class:
            print 'Total coordinates in %s \t = %d' % (label2class[label],
                                                       classcnt[label])

        if cfg.RESIZE_MICROS:
            print "Resizing %d micrographs ..." % len(allmicros)
            # ft.rmtree_assure(self._resized_micros_dir)
            ft.mkdir_assure(self._resized_micros_dir)
            f = functools.partial(ParticleCoords2TFRecord.preprocess_micro)

            # # Sequential
            # for micro in allmicros:
            #      f([micro,allmicros[micro]])

            # Parallel
            num_proc = 2 * mp.cpu_count()
            pool = ThreadPool(num_proc)
            pool.map(f, zip(allmicros.keys(),
                            [allmicros[m] for m in allmicros]))

        # save updated micrographs
        self.allmicros = allmicros
Beispiel #6
0
from   fileio import filetools as ft
from   utils import tprint

# import numpy as np
# from   fileio import mrc
# from   myplotlib import imshow,clf
# import os

_NUM_SHARDS = 5

data_png_dir = '/jasper/data/train_data_sets/junk_detection/png/'
tfrecord_dir = '/jasper/data/train_data_sets/junk_detection/tfrecord/'

tprint('Cleaning %s ...' % data_png_dir)
ft.rmtree_assure(data_png_dir)
ft.mkdir_assure(data_png_dir)

# Split Junk mrc files into pngs
pour_mrcs2pngs('/jasper/result/PKM2_WT/Extract/bad_6840_101/Movies/',data_png_dir,'junk')
# Split Good mrc files into pngs
pour_mrcs2pngs('/jasper/result/PKM2_WT/Extract/good_78505/Movies/',data_png_dir,'good',use_only=6840)

tprint('Cleaning %s ...' % tfrecord_dir)
ft.rmtree_assure(tfrecord_dir)
ft.mkdir_assure(tfrecord_dir)

convert_dir_classes(data_png_dir+'/train',tfrecord_dir,'train',_NUM_SHARDS)
convert_dir_classes(data_png_dir+'/test',tfrecord_dir,'test',_NUM_SHARDS)

tprint('Finished TFRecord Conversions!')
Beispiel #7
0
outdir  = os.path.join(base_dir,'cnnpick/')

psize = path2psize(ctfstar, 'CtfFind')
# particle diameter in pixels
D = path2part_diameter(ctfstar) / psize
# calcualte micrographs binning factor
bn = D / cfg.PART_D_PIXELS

####### Read Relion jobs infor and prepare picking params ########
# create dataflow
ds,shape = init_dataflow(ctfstar,FLAGS.batch_size)
# outmicrodir = os.path.join(outdir,basedir)

# initialize output directory
ft.rmtree_assure(outdir)
ft.mkdir_assure(outdir)

##################################################################
tprint("Picking from ~%d micrographs with CTF better than %.2fA resolution" % (ds.size()*FLAGS.batch_size,cfg.CTF_RES_THRESH))
with tf.Graph().as_default() as g:
    model    = Model.create_instance(FLAGS.model_path, 'test', FLAGS.tfrecord_dir)
    classes  = model._dataset.example_meta['classes']
    cleanidx = classes.index('clean')

    with g.device(device):
        with tf.name_scope('inputs'):
            images = tf.placeholder(tf.float32, shape=(FLAGS.batch_size,None, None, 1))
            data   = {'image': images}

    # create model
    logits, end_points = model.network(data, is_training=False)