def read_compressed_eigerdata(mask, filename, beg, end, bad_pixel_threshold=1e15, hot_pixel_threshold=2**30, bad_pixel_low_threshold=0, bad_frame_list=None, with_pickle=False, direct_load_data=False, data_path=None, images_per_file=100): ''' Read already compress eiger data Return mask avg_img imsum bad_frame_list ''' #should use try and except instead of with_pickle in the future! CAL = False if not with_pickle: CAL = True else: try: mask, avg_img, imgsum, bad_frame_list_ = pkl.load( open(filename + '.pkl', 'rb')) except: CAL = True if CAL: FD = Multifile(filename, beg, end) imgsum = np.zeros(FD.end - FD.beg, dtype=np.float) avg_img = np.zeros([FD.md['ncols'], FD.md['nrows']], dtype=np.float) imgsum, bad_frame_list_ = get_each_frame_intensityc( FD, sampling=1, bad_pixel_threshold=bad_pixel_threshold, bad_pixel_low_threshold=bad_pixel_low_threshold, hot_pixel_threshold=hot_pixel_threshold, plot_=False, bad_frame_list=bad_frame_list) avg_img = get_avg_imgc(FD, beg=None, end=None, sampling=1, plot_=False, bad_frame_list=bad_frame_list_) FD.FID.close() return mask, avg_img, imgsum, bad_frame_list_
def _get_mean_intensity_one_q(FD, sampling, labels): mi = np.zeros(int((FD.end - FD.beg) / sampling)) n = 0 qind, pixelist = roi.extract_label_indices(labels) # iterate over the images to compute multi-tau correlation fra_pix = np.zeros_like(pixelist, dtype=np.float64) timg = np.zeros(FD.md['ncols'] * FD.md['nrows'], dtype=np.int32) timg[pixelist] = np.arange(1, len(pixelist) + 1) for i in range(FD.beg, FD.end, sampling): (p, v) = FD.rdrawframe(i) w = np.where(timg[p])[0] pxlist = timg[p[w]] - 1 mi[n] = np.bincount(qind[pxlist], weights=v[w], minlength=2)[1:] n += 1 return mi
def get_time_edge_avg_img(FD, frame_edge, show_progress=True): '''YG Dev Nov 14, 2017@CHX Get averaged img by giving FD and frame edges Parameters ---------- FD: Multifile class compressed file frame_edge: np.array, can be created by create_time_slice( Nimg, slice_num= 3, slice_width= 1, edges = None ) e.g., np.array([[ 5, 6], [2502, 2503], [4999, 5000]]) Return: array: (N of frame_edge, averaged image) , i.e., d[0] gives the first averaged image ''' Nt = len(frame_edge) d = np.zeros(Nt, dtype=object) for i in range(Nt): t1, t2 = frame_edge[i] d[i] = get_avg_imgc(FD, beg=t1, end=t2, sampling=1, plot_=False, show_progress=show_progress) return d
def get_bin_frame(self): FD = self.FD self.frames = np.zeros( [FD.md['ncols'], FD.md['nrows'], len(self.time_edge)]) for n in tqdm(range(len(self.time_edge))): #print (n) t1, t2 = self.time_edge[n] #print( t1, t2) self.frames[:, :, n] = get_avg_imgc(FD, beg=t1, end=t2, sampling=1, plot_=False, show_progress=False)
def cal_waterfallc(FD, labeled_array, qindex=1, bin_waterfall=False, waterfall_roi_size=None, save=False, *argv, **kwargs): """Compute the mean intensity for each ROI in the compressed file (FD) Parameters ---------- FD: Multifile class compressed file labeled_array : array labeled array; 0 is background. Each ROI is represented by a nonzero integer. It is not required that the ROI labels are contiguous qindex : int, qindex=1, give the first ring in SAXS geometry. NOTE: qindex=0 is non-photon pixels. The ROI's to use. bin_waterfall: if True, will bin the waterfall along y-axis waterfall_roi_size: the size of waterfall roi, (x-size, y-size), if bin, will bin along y save: save the waterfall Returns ------- waterfall : array The mean intensity of each ROI for all `images` Dimensions: len(mean_intensity) == len(index) len(mean_intensity[0]) == len(images) index : list The labels for each element of the `mean_intensity` list """ sampling = 1 labeled_array_ = np.array(labeled_array == qindex, dtype=np.int64) qind, pixelist = roi.extract_label_indices(labeled_array_) if labeled_array_.shape != (FD.md['ncols'], FD.md['nrows']): raise ValueError( " `image` shape (%d, %d) in FD is not equal to the labeled_array shape (%d, %d)" % (FD.md['ncols'], FD.md['nrows'], labeled_array_.shape[0], labeled_array_.shape[1])) # pre-allocate an array for performance # might be able to use list comprehension to make this faster watf = np.zeros([int((FD.end - FD.beg) / sampling), len(qind)]) #fra_pix = np.zeros_like( pixelist, dtype=np.float64) timg = np.zeros(FD.md['ncols'] * FD.md['nrows'], dtype=np.int32) timg[pixelist] = np.arange(1, len(pixelist) + 1) #maxqind = max(qind) norm = np.bincount(qind)[1:] n = 0 #for i in tqdm(range( FD.beg , FD.end )): for i in tqdm(range(FD.beg, FD.end, sampling), desc='Get waterfall for q index=%s' % qindex): (p, v) = FD.rdrawframe(i) w = np.where(timg[p])[0] pxlist = timg[p[w]] - 1 watf[n][pxlist] = v[w] n += 1 if bin_waterfall: watf_ = watf.copy() watf = np.zeros([watf_.shape[0], waterfall_roi_size[0]]) for i in range(waterfall_roi_size[1]): watf += watf_[:, waterfall_roi_size[0] * i:waterfall_roi_size[0] * (i + 1)] watf /= waterfall_roi_size[0] if save: path = kwargs['path'] uid = kwargs['uid'] np.save(path + '%s_waterfall' % uid, watf) return watf
def mean_intensityc(FD, labeled_array, sampling=1, index=None): """Compute the mean intensity for each ROI in the compressed file (FD) Parameters ---------- FD: Multifile class compressed file labeled_array : array labeled array; 0 is background. Each ROI is represented by a nonzero integer. It is not required that the ROI labels are contiguous index : int, list, optional The ROI's to use. If None, this function will extract averages for all ROIs Returns ------- mean_intensity : array The mean intensity of each ROI for all `images` Dimensions: len(mean_intensity) == len(index) len(mean_intensity[0]) == len(images) index : list The labels for each element of the `mean_intensity` list """ qind, pixelist = roi.extract_label_indices(labeled_array) if labeled_array.shape != (FD.md['ncols'], FD.md['nrows']): raise ValueError( " `image` shape (%d, %d) in FD is not equal to the labeled_array shape (%d, %d)" % (FD.md['ncols'], FD.md['nrows'], labeled_array.shape[0], labeled_array.shape[1])) # handle various input for `index` if index is None: index = list(np.unique(labeled_array)) index.remove(0) else: try: len(index) except TypeError: index = [index] index = np.array(index) #print ('here') good_ind = np.zeros(max(qind), dtype=np.int32) good_ind[index - 1] = np.arange(len(index)) + 1 w = np.where(good_ind[qind - 1])[0] qind = good_ind[qind[w] - 1] pixelist = pixelist[w] # pre-allocate an array for performance # might be able to use list comprehension to make this faster mean_intensity = np.zeros([int((FD.end - FD.beg) / sampling), len(index)]) #fra_pix = np.zeros_like( pixelist, dtype=np.float64) timg = np.zeros(FD.md['ncols'] * FD.md['nrows'], dtype=np.int32) timg[pixelist] = np.arange(1, len(pixelist) + 1) #maxqind = max(qind) norm = np.bincount(qind)[1:] n = 0 #for i in tqdm(range( FD.beg , FD.end )): for i in tqdm(range(FD.beg, FD.end, sampling), desc='Get ROI intensity of each frame'): (p, v) = FD.rdrawframe(i) w = np.where(timg[p])[0] pxlist = timg[p[w]] - 1 mean_intensity[n] = np.bincount(qind[pxlist], weights=v[w], minlength=len(index) + 1)[1:] n += 1 mean_intensity /= norm return mean_intensity, index
def _readImage(self): (p, v) = self._readImageRaw() img = np.zeros((self.md['ncols'], self.md['nrows'])) np.put(np.ravel(img), p, v) return (img)
def init_compress_eigerdata(images, mask, md, filename, bad_pixel_threshold=1e15, hot_pixel_threshold=2**30, bad_pixel_low_threshold=0, nobytes=4, bins=1, with_pickle=True): ''' Compress the eiger data Create a new mask by remove hot_pixel Do image average Do each image sum Find badframe_list for where image sum above bad_pixel_threshold Generate a compressed data with filename if bins!=1, will bin the images with bin number as bins Header contains 1024 bytes ['Magic value', 'beam_center_x', 'beam_center_y', 'count_time', 'detector_distance', 'frame_time', 'incident_wavelength', 'x_pixel_size', 'y_pixel_size', bytes per pixel (either 2 or 4 (Default)), Nrows, Ncols, Rows_Begin, Rows_End, Cols_Begin, Cols_End ] Return mask avg_img imsum bad_frame_list ''' fp = open(filename, 'wb') #Make Header 1024 bytes #md = images.md if bins != 1: nobytes = 8 Header = struct.pack('@16s8d7I916x', b'Version-COMP0001', md['beam_center_x'], md['beam_center_y'], md['count_time'], md['detector_distance'], md['frame_time'], md['incident_wavelength'], md['x_pixel_size'], md['y_pixel_size'], nobytes, md['pixel_mask'].shape[1], md['pixel_mask'].shape[0], 0, md['pixel_mask'].shape[1], 0, md['pixel_mask'].shape[0]) fp.write(Header) Nimg_ = len(images) avg_img = np.zeros_like(images[0], dtype=np.float) Nopix = float(avg_img.size) n = 0 good_count = 0 frac = 0.0 if nobytes == 2: dtype = np.int16 elif nobytes == 4: dtype = np.int32 elif nobytes == 8: dtype = np.float64 else: print( "Wrong type of nobytes, only support 2 [np.int16] or 4 [np.int32]") dtype = np.int32 Nimg = Nimg_ // bins time_edge = np.array( create_time_slice(N=Nimg_, slice_num=Nimg, slice_width=bins)) imgsum = np.zeros(Nimg) if bins != 1: print('The frames will be binned by %s' % bins) for n in tqdm(range(Nimg)): t1, t2 = time_edge[n] img = np.average(images[t1:t2], axis=0) mask &= img < hot_pixel_threshold p = np.where((np.ravel(img) > 0) & np.ravel(mask))[0] #don't use masked data v = np.ravel(np.array(img, dtype=dtype))[p] dlen = len(p) imgsum[n] = v.sum() if (imgsum[n] > bad_pixel_threshold) or (imgsum[n] <= bad_pixel_low_threshold): #if imgsum[n] >=bad_pixel_threshold : dlen = 0 fp.write(struct.pack('@I', dlen)) else: np.ravel(avg_img)[p] += v good_count += 1 frac += dlen / Nopix #s_fmt ='@I{}i{}{}'.format( dlen,dlen,'ih'[nobytes==2]) fp.write(struct.pack('@I', dlen)) fp.write(struct.pack('@{}i'.format(dlen), *p)) if bins == 1: fp.write( struct.pack('@{}{}'.format(dlen, 'ih'[nobytes == 2]), *v)) else: fp.write( struct.pack('@{}{}'.format(dlen, 'dd'[nobytes == 2]), *v)) #n +=1 fp.close() frac /= good_count print("The fraction of pixel occupied by photon is %6.3f%% " % (100 * frac)) avg_img /= good_count bad_frame_list = np.where((np.array(imgsum) > bad_pixel_threshold) | ( np.array(imgsum) <= bad_pixel_low_threshold))[0] #bad_frame_list1 = np.where( np.array(imgsum) > bad_pixel_threshold )[0] #bad_frame_list2 = np.where( np.array(imgsum) < bad_pixel_low_threshold )[0] #bad_frame_list = np.unique( np.concatenate( [bad_frame_list1, bad_frame_list2]) ) if len(bad_frame_list): print('Bad frame list are: %s' % bad_frame_list) else: print('No bad frames are involved.') if with_pickle: pkl.dump([mask, avg_img, imgsum, bad_frame_list], open(filename + '.pkl', 'wb')) return mask, avg_img, imgsum, bad_frame_list
def segment_compress_eigerdata(images, mask, md, filename, bad_pixel_threshold=1e15, hot_pixel_threshold=2**30, bad_pixel_low_threshold=0, nobytes=4, bins=1, N1=None, N2=None, dtypes='images', reverse=True): ''' Create a compressed eiger data without header, this function is for parallel compress for parallel compress don't pass any non-scalar parameters ''' if dtypes == 'uid': uid = md['uid'] #images detector = get_detector(db[uid]) images = load_data(uid, detector, reverse=reverse)[N1:N2] print(N1, N2) Nimg_ = len(images) M, N = images[0].shape avg_img = np.zeros([M, N], dtype=np.float) Nopix = float(avg_img.size) n = 0 good_count = 0 #frac = 0.0 if nobytes == 2: dtype = np.int16 elif nobytes == 4: dtype = np.int32 elif nobytes == 8: dtype = np.float64 else: print( "Wrong type of nobytes, only support 2 [np.int16] or 4 [np.int32]") dtype = np.int32 #Nimg = Nimg_//bins Nimg = int(np.ceil(Nimg_ / bins)) time_edge = np.array( create_time_slice(N=Nimg_, slice_num=Nimg, slice_width=bins)) #print( time_edge, Nimg_, Nimg, bins, N1, N2 ) imgsum = np.zeros(Nimg) if bins != 1: print('The frames will be binned by %s' % bins) fp = open(filename, 'wb') for n in range(Nimg): t1, t2 = time_edge[n] if bins != 1: img = np.array(np.average(images[t1:t2], axis=0), dtype=np.float64) #dtype=np.int32) else: img = np.array(images[t1], dtype=np.int32) mask &= img < hot_pixel_threshold p = np.where( (np.ravel(img) > 0) * np.ravel(mask))[0] #don't use masked data v = np.ravel(np.array(img, dtype=dtype))[p] dlen = len(p) imgsum[n] = v.sum() if (dlen == 0) or (imgsum[n] > bad_pixel_threshold) or ( imgsum[n] <= bad_pixel_low_threshold): dlen = 0 fp.write(struct.pack('@I', dlen)) else: np.ravel(avg_img)[p] += v good_count += 1 fp.write(struct.pack('@I', dlen)) fp.write(struct.pack('@{}i'.format(dlen), *p)) if bins == 1: fp.write( struct.pack('@{}{}'.format(dlen, 'ih'[nobytes == 2]), *v)) else: fp.write( struct.pack('@{}{}'.format(dlen, 'dd'[nobytes == 2]), *v)) #n +=1 del p, v, img fp.flush() fp.close() avg_img /= good_count bad_frame_list = (np.array(imgsum) > bad_pixel_threshold) | ( np.array(imgsum) <= bad_pixel_low_threshold) sys.stdout.write('#') sys.stdout.flush() #del images, mask, avg_img, imgsum, bad_frame_list #print( 'Should release memory here') return mask, avg_img, imgsum, bad_frame_list
def para_compress_eigerdata(images, mask, md, filename, num_sub=100, bad_pixel_threshold=1e15, hot_pixel_threshold=2**30, bad_pixel_low_threshold=0, nobytes=4, bins=1, dtypes='uid', reverse=True, num_max_para_process=500, cpu_core_number=72, with_pickle=True): if dtypes == 'uid': uid = md['uid'] #images detector = get_detector(db[uid]) images_ = load_data(uid, detector, reverse=reverse)[:100] N = len(images_) else: N = len(images) #print( N) N = int(np.ceil(N / bins)) Nf = int(np.ceil(N / num_sub)) if Nf > cpu_core_number: print( "The process number is larger than %s (XF11ID server core number)" % cpu_core_number) num_sub_old = num_sub num_sub = int(np.ceil(N / cpu_core_number)) Nf = int(np.ceil(N / num_sub)) print("The sub compressed file number was changed from %s to %s" % (num_sub_old, num_sub)) create_compress_header(md, filename + '-header', nobytes, bins) #print( 'done for header here') results = para_segment_compress_eigerdata( images=images, mask=mask, md=md, filename=filename, num_sub=num_sub, bad_pixel_threshold=bad_pixel_threshold, hot_pixel_threshold=hot_pixel_threshold, bad_pixel_low_threshold=bad_pixel_low_threshold, nobytes=nobytes, bins=bins, dtypes=dtypes, num_max_para_process=num_max_para_process) res_ = np.array([results[k].get() for k in list(sorted(results.keys()))]) imgsum = np.zeros(N) bad_frame_list = np.zeros(N, dtype=bool) good_count = 1 for i in range(Nf): mask_, avg_img_, imgsum_, bad_frame_list_ = res_[i] imgsum[i * num_sub:(i + 1) * num_sub] = imgsum_ bad_frame_list[i * num_sub:(i + 1) * num_sub] = bad_frame_list_ if i == 0: mask = mask_ avg_img = np.zeros_like(avg_img_) else: mask *= mask_ if not np.sum(np.isnan(avg_img_)): avg_img += avg_img_ good_count += 1 bad_frame_list = np.where(bad_frame_list)[0] avg_img /= good_count if len(bad_frame_list): print('Bad frame list are: %s' % bad_frame_list) else: print('No bad frames are involved.') print('Combining the seperated compressed files together...') combine_compressed(filename, Nf, del_old=True) del results del res_ if with_pickle: pkl.dump([mask, avg_img, imgsum, bad_frame_list], open(filename + '.pkl', 'wb')) return mask, avg_img, imgsum, bad_frame_list
def get_each_frame_intensityc(FD, sampling=1, bad_pixel_threshold=1e10, bad_pixel_low_threshold=0, hot_pixel_threshold=2**30, plot_=False, bad_frame_list=None, save=False, *argv, **kwargs): '''Get the total intensity of each frame by sampling every N frames Also get bad_frame_list by check whether above bad_pixel_threshold Usuage: imgsum, bad_frame_list = get_each_frame_intensity(good_series ,sampling = 1000, bad_pixel_threshold=1e10, plot_ = True) ''' #print ( argv, kwargs ) #mask &= img < hot_pixel_threshold imgsum = np.zeros(int((FD.end - FD.beg) / sampling)) n = 0 for i in tqdm(range(FD.beg, FD.end, sampling), desc='Get each frame intensity'): (p, v) = FD.rdrawframe(i) if len(p) > 0: imgsum[n] = np.sum(v) n += 1 if plot_: uid = 'uid' if 'uid' in kwargs.keys(): uid = kwargs['uid'] fig, ax = plt.subplots() ax.plot(imgsum, 'bo') ax.set_title('uid= %s--imgsum' % uid) ax.set_xlabel('Frame_bin_%s' % sampling) ax.set_ylabel('Total_Intensity') if save: #dt =datetime.now() #CurTime = '%s%02d%02d-%02d%02d-' % (dt.year, dt.month, dt.day,dt.hour,dt.minute) path = kwargs['path'] if 'uid' in kwargs: uid = kwargs['uid'] else: uid = 'uid' #fp = path + "uid= %s--Waterfall-"%uid + CurTime + '.png' fp = path + "uid=%s--imgsum-" % uid + '.png' fig.savefig(fp, dpi=fig.dpi) plt.show() bad_frame_list_ = np.where((np.array(imgsum) > bad_pixel_threshold) | ( np.array(imgsum) <= bad_pixel_low_threshold))[0] + FD.beg if bad_frame_list is not None: bad_frame_list = np.unique( np.concatenate([bad_frame_list, bad_frame_list_])) else: bad_frame_list = bad_frame_list_ if len(bad_frame_list): print('Bad frame list length is: %s' % len(bad_frame_list)) else: print('No bad frames are involved.') return imgsum, bad_frame_list
def mean_intensityc(FD, labeled_array, sampling=1, index=None, multi_cor=False): """Compute the mean intensity for each ROI in the compressed file (FD), support parallel computation Parameters ---------- FD: Multifile class compressed file labeled_array : array labeled array; 0 is background. Each ROI is represented by a nonzero integer. It is not required that the ROI labels are contiguous index : int, list, optional The ROI's to use. If None, this function will extract averages for all ROIs Returns ------- mean_intensity : array The mean intensity of each ROI for all `images` Dimensions: len(mean_intensity) == len(index) len(mean_intensity[0]) == len(images) index : list The labels for each element of the `mean_intensity` list """ qind, pixelist = roi.extract_label_indices(labeled_array) sx, sy = (FD.rdframe(FD.beg)).shape if labeled_array.shape != (sx, sy): raise ValueError( " `image` shape (%d, %d) in FD is not equal to the labeled_array shape (%d, %d)" % (sx, sy, labeled_array.shape[0], labeled_array.shape[1])) # handle various input for `index` if index is None: index = list(np.unique(labeled_array)) index.remove(0) else: try: len(index) except TypeError: index = [index] index = np.array(index) #print ('here') good_ind = np.zeros(max(qind), dtype=np.int32) good_ind[index - 1] = np.arange(len(index)) + 1 w = np.where(good_ind[qind - 1])[0] qind = good_ind[qind[w] - 1] pixelist = pixelist[w] # pre-allocate an array for performance # might be able to use list comprehension to make this faster mean_intensity = np.zeros([int((FD.end - FD.beg) / sampling), len(index)]) #fra_pix = np.zeros_like( pixelist, dtype=np.float64) timg = np.zeros(FD.md['ncols'] * FD.md['nrows'], dtype=np.int32) timg[pixelist] = np.arange(1, len(pixelist) + 1) #maxqind = max(qind) norm = np.bincount(qind)[1:] n = 0 #for i in tqdm(range( FD.beg , FD.end )): if not multi_cor: for i in tqdm(range(FD.beg, FD.end, sampling), desc='Get ROI intensity of each frame'): (p, v) = FD.rdrawframe(i) w = np.where(timg[p])[0] pxlist = timg[p[w]] - 1 mean_intensity[n] = np.bincount(qind[pxlist], weights=v[w], minlength=len(index) + 1)[1:] n += 1 else: ring_masks = [ np.array(labeled_array == i, dtype=np.int64) for i in np.unique(labeled_array)[1:] ] inputs = range(len(ring_masks)) go_through_FD(FD) pool = Pool(processes=len(inputs)) print('Starting assign the tasks...') results = {} for i in tqdm(inputs): results[i] = apply_async(pool, _get_mean_intensity_one_q, (FD, sampling, ring_masks[i])) pool.close() print('Starting running the tasks...') res = [results[k].get() for k in tqdm(list(sorted(results.keys())))] #return res for i in inputs: mean_intensity[:, i] = res[i] print('ROI mean_intensit calculation is DONE!') del results del res mean_intensity /= norm return mean_intensity, index