def output_coords(bn,sz,base_dir,outdir,args): '''Converts particle maps into coordinates and saves coordinates with the png file''' pmap,imdisp,micro = args microname = ft.file_only(micro) outmicrodir = os.path.join(outdir,ft.remove_prefix(os.path.dirname(micro),base_dir)[1:]) ft.mkdir_assure(outmicrodir) # particle coordinates in the original micrograph coordinates coords = np.column_stack(np.where(pmap > cfg.MIN_CLEARANCE)) if coords.shape[0] < cfg.MIN_PART_PER_MICRO: print "Too few particles detected %d, dropping micrograph %s ..." % (coords.shape[0],microname) # coords = np.zeros((0,2),dtype=np.float32) return 0 szpmap = np.int32(pmap.shape) szdisp = np.int32(imdisp.shape[:2]) szbn = utils.np.int32(np.round(sz / bn)) # im coordinate system for display coords_disp = adjust_coodinates(coords, szpmap, szdisp, cfg.STRIDES[0]) # original micrograph coordinate system coords_orig = adjust_coodinates(coords, szpmap, szbn, cfg.STRIDES[0])*bn starname = os.path.join(outmicrodir, microname + '_manualpick.star') save_coords_in_star(starname, coords_orig) #### SAVE figure as well ###### figname = os.path.join(outmicrodir, microname + '.png') imrgb = plot_coord_rgb(np.squeeze(imdisp), {'particles': coords_disp}, cfg.PART_D_PIXELS, cfg.CIRCLE_WIDTH) cv2.imwrite(figname,imrgb) # return number of particles return coords_orig.shape[0]
def test_example(self, provider): print "Generating example frames from the tfrecord ..." featsz = (cfg.PICK_WIN // cfg.STRIDES[0], ) * 2 out_dir = os.path.join(self.tfrecord_dir, 'example_ground_truth') ft.rmtree_assure(out_dir) ft.mkdir_assure(out_dir) keys = ['image'] + self.classes + ['dxyidxs', 'dxy'] data = provider.get(keys) with tf.Session().as_default() as sess: with tf.Graph().as_default(): with tf.device('/device:CPU:0'): coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord) for i in range(N_EXAMPLES): d = dict(zip(keys, sess.run(data))) im = d['image'] dd = dict((k, unravel_coords_append(d[k], featsz) * cfg.STRIDES[0]) for k in self.classes) fig = plot_class_coords(im, dd, cfg.PART_D_PIXELS) # save the resulting graph fname = os.path.join(out_dir, 'example_%d' % i) print "saving example %s, %d out of %d" % (fname, i, N_EXAMPLES) savefig(fig, fname) # plt.gcf().set_figheight(10.0) # plt.gcf().set_figwidth(10.0) # fig = plt.gcf() # fig.subplots_adjust(wspace=.1, hspace=0.2, left=0.03, right=0.98, bottom=0.05, top=0.93) # fig.savefig(fname) plt.close(fig)
def mrcs2pngs(fnames, label_dir, split, nlim): '''Writes mrc files as png files''' ft.mkdir_assure(label_dir) nfiles = 0 for f in fnames: nfiles += mrc.shape(f)[0] nfiles = int(min(nlim, nfiles)) bar = pyprind.ProgBar(nfiles, stream=1, title='Writing %d %s pngs into %s ...' % (nfiles, split, label_dir)) count = 0 for f in fnames: ims = mrc.load(f) for idx in range(ims.shape[0]): if count == nfiles: break png.from_array(float2uint16(ims[idx]), 'L').save( os.path.join(label_dir, ft.file_only(f) + '_%d.png' % idx)) count += 1 bar.update()
def convertmrcs2pngs(data_in_dir, pngdir): ft.mkdir_assure(pngdir) mrcnames = list_dirtree(data_in_dir, '.mrc') #mrcnames = [os.path.join(mrcsdict[key],key) for key in mrcsdict] # length of input path inlen = len(data_in_dir) # create png dir struct subdirs = set([os.path.dirname(mrcname)[inlen:] for mrcname in mrcnames]) for dir in subdirs: ft.mkdir_assure(os.path.join(pngdir, dir)) # create png filename list pngnames = [ os.path.join(pngdir, ft.replace_ext(name[inlen:], '')) for name in mrcnames ] num_cores = multiprocessing.cpu_count() mrc2pngs(pngnames[0], mrcnames[0]) print "Converting %d mrcs from %s, to pngs in %s, using %d cores" % \ (len(mrcnames),data_in_dir,pngdir,num_cores) Parallel(n_jobs=2 * num_cores)( delayed(mrc2pngs)(pngname, mrcname) for pngname, mrcname in zip(pngnames, mrcnames))
def __init__(self, data_in_dir, data_out_dir): super(ParticleCoords2TFRecord, self).__init__(data_in_dir, data_out_dir) self._resized_micros_dir = os.path.join( ft.updirs(data_in_dir + '/', 1), 'ResizedMicrographs') # each subdirectory in data_in_dir corresponds to a separate class # each class subdirectory contains symlink to a selection relion job topdirs = np.sort(os.walk(data_in_dir).next()[1]) class2label = {} label2class = {} allmicros = {} for d in range(len(topdirs)): cid = topdirs[d].tostring() tprint("Listing directory \'%s\' ..." % cid) class2label.update({cid: d}) label2class.update({d: cid}) # get all star files with particle coordinates for this class stars = list_dirtree(os.path.join(data_in_dir, cid), 'particles.star') self.add_class_coords(allmicros, stars, cid) # initialize box class keys self.classes = class2label.keys() self.init_feature_keys(self.classes) # here allmicros has particle coordinates for each class per micrograph tprint("Removing particle overlaping in different classes ...") allmicros = ParticleCoords2TFRecord.remove_class_overlap(allmicros) # count all particles classcnt = np.zeros(len(class2label)) for micro in allmicros: for cls in allmicros[micro]['coords']: classcnt[class2label[cls]] += len( allmicros[micro]['coords'][cls]) # print totals # print "Total micrographs %d" % len(allmicros) for label in label2class: print 'Total coordinates in %s \t = %d' % (label2class[label], classcnt[label]) if cfg.RESIZE_MICROS: print "Resizing %d micrographs ..." % len(allmicros) # ft.rmtree_assure(self._resized_micros_dir) ft.mkdir_assure(self._resized_micros_dir) f = functools.partial(ParticleCoords2TFRecord.preprocess_micro) # # Sequential # for micro in allmicros: # f([micro,allmicros[micro]]) # Parallel num_proc = 2 * mp.cpu_count() pool = ThreadPool(num_proc) pool.map(f, zip(allmicros.keys(), [allmicros[m] for m in allmicros])) # save updated micrographs self.allmicros = allmicros
from fileio import filetools as ft from utils import tprint # import numpy as np # from fileio import mrc # from myplotlib import imshow,clf # import os _NUM_SHARDS = 5 data_png_dir = '/jasper/data/train_data_sets/junk_detection/png/' tfrecord_dir = '/jasper/data/train_data_sets/junk_detection/tfrecord/' tprint('Cleaning %s ...' % data_png_dir) ft.rmtree_assure(data_png_dir) ft.mkdir_assure(data_png_dir) # Split Junk mrc files into pngs pour_mrcs2pngs('/jasper/result/PKM2_WT/Extract/bad_6840_101/Movies/',data_png_dir,'junk') # Split Good mrc files into pngs pour_mrcs2pngs('/jasper/result/PKM2_WT/Extract/good_78505/Movies/',data_png_dir,'good',use_only=6840) tprint('Cleaning %s ...' % tfrecord_dir) ft.rmtree_assure(tfrecord_dir) ft.mkdir_assure(tfrecord_dir) convert_dir_classes(data_png_dir+'/train',tfrecord_dir,'train',_NUM_SHARDS) convert_dir_classes(data_png_dir+'/test',tfrecord_dir,'test',_NUM_SHARDS) tprint('Finished TFRecord Conversions!')
outdir = os.path.join(base_dir,'cnnpick/') psize = path2psize(ctfstar, 'CtfFind') # particle diameter in pixels D = path2part_diameter(ctfstar) / psize # calcualte micrographs binning factor bn = D / cfg.PART_D_PIXELS ####### Read Relion jobs infor and prepare picking params ######## # create dataflow ds,shape = init_dataflow(ctfstar,FLAGS.batch_size) # outmicrodir = os.path.join(outdir,basedir) # initialize output directory ft.rmtree_assure(outdir) ft.mkdir_assure(outdir) ################################################################## tprint("Picking from ~%d micrographs with CTF better than %.2fA resolution" % (ds.size()*FLAGS.batch_size,cfg.CTF_RES_THRESH)) with tf.Graph().as_default() as g: model = Model.create_instance(FLAGS.model_path, 'test', FLAGS.tfrecord_dir) classes = model._dataset.example_meta['classes'] cleanidx = classes.index('clean') with g.device(device): with tf.name_scope('inputs'): images = tf.placeholder(tf.float32, shape=(FLAGS.batch_size,None, None, 1)) data = {'image': images} # create model logits, end_points = model.network(data, is_training=False)