def get_dimension_only(imagefile): # get OME-XML and change the encoding to UTF-8 omexml = bioformats.get_omexml_metadata(imagefile) new_omexml = omexml.encode('utf-8') rdr = bioformats.get_image_reader(None, path=imagefile) # read total number of image series totalseries = rdr.rdr.getSeriesCount() # get dimensions for CTZXY md = bioformats.OMEXML(new_omexml) pixels = md.image(IMAGEID).Pixels SizeC = pixels.SizeC SizeT = pixels.SizeT SizeZ = pixels.SizeZ SizeX = pixels.SizeX SizeY = pixels.SizeY print 'Series: ', totalseries, 'T: ', SizeT, 'Z: ', SizeZ, 'C: ', SizeC, 'X: ', SizeX, 'Y: ', SizeY # usually the x-axis of an image is from left --> right and y from top --> bottom # in order to be compatible with numpy arrays XY are switched # for numpy arrays the 2st axis are columns (top --> down) = Y-Axis for an image sizes = [totalseries, SizeT, SizeZ, SizeC, SizeY, SizeX] return sizes
def get_czi_metadata(infile): """ This parses the CZI file with the bioformats tool: showinf. :param infile: file location of the CZI file :return: dictionary of meta information """ command = [ '/usr/local/share/bftools/showinf', '-nopix', '-omexml-only', infile ] metadata = subprocess.check_output(command).decode('utf-8') metadata = bioformats.OMEXML(metadata) # Series #0 should be the first tissue sample at full resolution. # Series #1 tends to be this same tissue sample at half the resolution. # This continues halving resolution 5-6 times in succession. We only # want the full resolution tissue series so we ignore those with dimensions # that are much smaller than expected. Valid series are checked in get_fullres_series_indices metadata_dict = {} for i in range(metadata.image_count): image = metadata.image(i) metadata_dict[i] = {} metadata_dict[i]['width'] = image.Pixels.SizeX metadata_dict[i]['height'] = image.Pixels.SizeY metadata_dict[i]['channels'] = image.Pixels.channel_count image = metadata.image(0) for i in range(image.Pixels.channel_count): metadata_dict[f'channel_{i}_name'] = image.Pixels.channel(i).Name return metadata_dict
def readZPositions(stk_files): # the only entry parameter for this function is a list with # the stk files print('\n\t\treading z-values... ') javabridge.start_vm(class_path=bioformats.JARS) zs = [] # array with z-positions ts = [] # array with time point (got from data files) print('starting loop...') for file_path in stk_files: print('reading file: {}'.format(file_path)) md = bioformats.get_omexml_metadata(file_path) ome = bioformats.OMEXML(md) # create an instance of an image to read z-position zp = ome.image().Pixels.Plane().get_PositionZ() time = int(file_path[:-4].split('t')[-1]) zs.append(zp) ts.append(time) z_offsets = np.array([item - min(np.array(zs)) for item in zs]) javabridge.kill_vm() return np.array(zs), np.array(ts), z_offsets
def save_images(self): """Saves the individual images as a npy file 2D might have more acquisitions +/- focal plane, (usually 3 images). focal_plane_idx corresponds to the plane to consider. Mid-plane is the one in focus and the +/- on either side would be blurred. For 2D acquisitions, this is stored along the Z dimension. How is this handled for 3D acquisitions? """ if not os.path.exists(self.lif_fname): raise FileNotFoundError("LIF file doesn't exist at:", self.lif_fname) os.makedirs(self.split_dir, exist_ok=True) jv.start_vm(class_path=bf.JARS, max_heap_size='8G') metadata = bf.get_omexml_metadata(self.lif_fname) omexml_object = bf.OMEXML(metadata) num_channels = omexml_object.image().Pixels.channel_count num_samples = omexml_object.get_image_count() num_timepoints = omexml_object.image().Pixels.SizeT num_pix_z = omexml_object.image().Pixels.SizeZ size_x_um = omexml_object.image().Pixels.PhysicalSizeX size_y_um = omexml_object.image().Pixels.PhysicalSizeY size_z_um = omexml_object.image().Pixels.PhysicalSizeZ reader = bf.ImageReader(self.lif_fname, perform_init=True) records = [] for timepoint_idx in range(num_timepoints): timepoint_dir = os.path.join(self.split_dir, 'timepoint_{}'.format(timepoint_idx)) os.makedirs(timepoint_dir, exist_ok=True) for channel_idx in range(num_channels): channel_dir = os.path.join(timepoint_dir, 'channel_{}'.format(channel_idx)) os.makedirs(channel_dir, exist_ok=True) for sample_idx in range(15, 412): # num_samples cur_records = self.save_each_image(reader, num_pix_z, channel_dir, timepoint_idx, channel_idx, sample_idx, size_x_um, size_y_um, size_z_um) records.extend(cur_records) msg = 'Wrote files for tp:{}, channel:{}'.format( timepoint_idx, channel_idx) self._log_info(msg) df = pd.DataFrame.from_records(records, columns=[ 'timepoint', 'channel_num', 'sample_num', 'slice_num', 'fname', 'size_x_microns', 'size_y_microns', 'size_z_microns' ]) metadata_fname = os.path.join(self.split_dir, 'split_images_info.csv') df.to_csv(metadata_fname, sep=',') jv.kill_vm()
def extract_meta_bioformats(filepath, metadata=dict()): omexmlstr = bioformats.get_omexml_metadata(filepath) o = bioformats.OMEXML(omexmlstr) x = o.image().Pixels metadata['size_Z'] = x.SizeZ metadata['size_T'] = x.SizeT metadata['scale'] = x.PhysicalSizeX return metadata
def getImageInfo(filename): md = bf.get_omexml_metadata(filename) ome = bf.OMEXML(md) pixels = ome.image().Pixels pT = pixels.SizeT pX = pixels.SizeX pY = pixels.SizeY return pT, pX, pY
def getImageShape(path): xmlimage=bf.get_omexml_metadata(path=path) metadata= bf.OMEXML(xmlimage) NX= metadata.image().Pixels.SizeX NY= metadata.image().Pixels.SizeY NZ= metadata.image().Pixels.SizeZ NC= metadata.image().Pixels.SizeC NT= metadata.image().Pixels.SizeT return (NX,NY,NZ,NC,NT)
def get_editable_omexml(path): """ Parse OMEXML header data from a Bio-Formats-compatible file. Used to parse metadata from .nd2 file and pass on to .ome.tiff file. """ o = bioformats.get_omexml_metadata(path) new_omexml = bioformats.OMEXML(o) return new_omexml
def get_movie_shape(path): xml_image = bf.get_omexml_metadata(path=path) metadata = bf.OMEXML(xml_image) path = path NX = metadata.image().Pixels.SizeX NY = metadata.image().Pixels.SizeY NZ = metadata.image().Pixels.SizeZ NC = metadata.image().Pixels.SizeC NT = metadata.image().Pixels.SizeT return NX,NY,NZ,NC,NT
def get_metadata_store(imagefile): if not VM_STARTED: start_jvm() if VM_KILLED: jvm_error() omexml = bioformats.get_omexml_metadata(imagefile) new_omexml = omexml.encode('utf-8') metadatastore = bioformats.OMEXML(new_omexml) return metadatastore
def get_metadata_store(imagefile): if not VM_STARTED: start_jvm() if VM_KILLED: jvm_error() # get OME-XML and change the encoding to UTF-8 omexml = get_OMEXML(imagefile) # get the metadata from the OME-XML omexmlmetadata = bioformats.OMEXML(omexml) return omexmlmetadata
def showGrahp(filename, num): md = bf.get_omexml_metadata(filename) #rdr = bf.ImageReader(filename, perform_init=True) ome = bf.OMEXML(md) pixels = ome.image().Pixels print('pixels in Z: ' + str(pixels.SizeZ)) print('pixels in C: ' + str(pixels.SizeC)) print('pixels in T: ' + str(pixels.SizeT)) print('pixels in X: ' + str(pixels.SizeX)) print('pixels in Y: ' + str(pixels.SizeY)) with bf.ImageReader(filename) as reader: img = reader.read(t=num) plt.imshow(img, cmap=plt.cm.binary) plt.show() reader.close()
def readfile(filename): # read metadata metadata = bioformats.get_omexml_metadata(filename) xml = bioformats.OMEXML(metadata) Pixels = xml.image().Pixels nx, ny, nz, nt = Pixels.SizeX, Pixels.SizeY, Pixels.SizeZ, Pixels.SizeT # read image data image4d = np.zeros(shape=(nx, ny, nz, nt)) reader = bioformats.ImageReader(filename) for t in range(nt): for z in range(nz): image4d[:, :, z, t] = reader.read(z=z, t=t, rescale=False) return image4d
def read(self, microscope_metadata_files, microscope_image_files): '''Provides an empty OMEXML. Parameters ---------- microscope_metadata_files: List[str] absolute path to the microscope metadata files microscope_image_files: List[str] absolute path to the microscope image files Returns ------- bioformats.omexml.OMEXML OMEXML image metadata ''' return bioformats.OMEXML(XML_DECLARATION)
def read_metadata(self): omexmlstr = bioformats.get_omexml_metadata(self.path) omexml = bioformats.OMEXML(omexmlstr) # orgmetadata = bioformats.OMEXML.OriginalMetadata(omexml) root = ET.fromstring(omexmlstr) print(omexml) tree = ET.ElementTree(root) self.channel_count = omexml.image().Pixels.channel_count self.channel_names = [] # populate channel_names for i in np.arange(0, self.channel_count, 1): self.channel_names.append(omexml.image().Pixels.Channel(i).Name) # print('image_count:',omexml.get_image_count()) # print('image physical sizex unit',omexml.image().Pixels.get_PhysicalSizeXUnit()) # print('image physical sizex',omexml.image().Pixels.get_PhysicalSizeX()) # print('image sizex',omexml.image().Pixels.get_SizeX()) # print('image physical sizey unit',omexml.image().Pixels.get_PhysicalSizeYUnit()) # print('image physical sizey',omexml.image().Pixels.get_PhysicalSizeY()) # print('image sizey',omexml.image().Pixels.get_SizeY()) # print('image sizez',omexml.image().Pixels.get_SizeZ()) # print('image sixec',omexml.image().Pixels.get_SizeC()) # print('image sizet',omexml.image().Pixels.get_SizeT()) # print('image dimensions',omexml.image().Pixels.get_DimensionOrder()) tagprefix = '{http://www.openmicroscopy.org/Schemas/OME/2016-06}' for child1 in root: print('Child1: ', child1.tag, '\t', child1.attrib) for child2 in child1: print('Child2: ', child2.tag, '\t', child2.attrib) count = 0 for child3 in child2: print('Child3: ', child3.tag, child3.attrib) if (count >= 10): break if (child3.tag == tagprefix + 'Plane'): count = count + 1 for child4 in child3: print('Child4: ', child4.tag, child4.attrib) for child5 in child4: print('Child5: ', child5.tag, child5.attrib) for child6 in child5: print('Child6: ', child6.tag, child6.attrib) print('-----------------------------------') print('====================================')
def load_slide_scanner_image(self, z=0, serie=4): self.downfactor = 1 self._img_vsi = bf.load_image(self.path.as_posix(), t=0, series=serie, z=z) metadata = bf.get_omexml_metadata(self.path.as_posix()) o = bf.OMEXML(metadata) n_zslices = o.image_count self.n_slices_known.emit(n_zslices, self._img_vsi.shape[-1], 4, 1) self.slice_slide_scanner()
def __init__(self, filename): self.reader: bioformats.ImageReader self.indexes = [] self.sizes = [] self.mags = [] xml = bioformats.get_omexml_metadata(filename) self.meta = bioformats.OMEXML(xml) # get magnification #meta.Objective()... for i in range(self.meta.get_image_count()): imeta = self.meta.image(i) pmeta = imeta.Pixels name = imeta.Name if pmeta.PhysicalSizeX is not None and name.endswith('x'): self.indexes.append(i) self.sizes.append((pmeta.SizeX, pmeta.SizeY)) self.mags.append(int(name[0:-1])) self.reader = bioformats.ImageReader(filename)
def get_metadata(self, save=True): # Retrieve metadata info, put it in data frame and save it. print('Getting and saving metadata for ' + self.fullpath + '...') ome = bf.OMEXML(bf.get_omexml_metadata(self.fullpath)) md = [] for ind in range(ome.image_count): iome = ome.image(ind) md.append([ iome.get_ID(), iome.get_Name(), iome.Pixels.get_SizeC(), iome.Pixels.get_SizeX(), iome.Pixels.get_PhysicalSizeX(), iome.Pixels.get_PhysicalSizeXUnit(), iome.Pixels.get_SizeY(), iome.Pixels.get_PhysicalSizeY(), iome.Pixels.get_PhysicalSizeYUnit(), iome.Pixels.get_SizeZ(), iome.Pixels.get_PhysicalSizeZ(), iome.Pixels.get_PhysicalSizeZUnit(), iome.Pixels.get_PixelType() ]) self.md = pd.DataFrame(md) self.md.columns = [ 'ID', 'Name', 'Nchan', 'SizeX', 'PhysicalSizeX', 'PhysicalSizeXUnit', 'SizeY', 'PhysicalSizeY', 'PhysicalSizeYUnit', 'SizeZ', 'PhysicalSizeZ', 'PhysicalSizeZUnit', 'PixelType' ] if save: print('Saving metadata...') fname = os.path.join(self.dpath, self.date, self.project, 'parsed_metadata') with open(fname + '.pickle', 'wb') as f: pickle.dump(self.md, f) self.md.to_csv(fname + '.csv', index=False) # Create one folder per series project_path = os.path.join(self.dpath, self.date, self.project) for i in range(ome.image_count): folder_name = 'S{:0>2d}'.format(i + 1) + '_' + self.md.Name[i] # Useful ref: https://mkaz.blog/code/python-string-format-cookbook/ series_folder = os.path.join(project_path, folder_name) if not os.path.exists(series_folder): print('Creating subfolder ' + series_folder + '...') os.makedirs(series_folder)
def read_metadata(self,update=False): if self._metadata and not update: return self._metadata # For some reason, tif files need to use the generic ImageReader while everything else # can use the OMETiffReader. if self._file_path.endswith('.ome.tif'): rdr = jutil.JClassWrapper('loci.formats.in.OMETiffReader')() else: rdr = jutil.JClassWrapper('loci.formats.ImageReader')() rdr.setOriginalMetadataPopulated(True) clsOMEXMLService = jutil.JClassWrapper('loci.formats.services.OMEXMLService') serviceFactory = jutil.JClassWrapper('loci.common.services.ServiceFactory')() service = serviceFactory.getInstance(clsOMEXMLService.klass) omexml = service.createOMEXMLMetadata() rdr.setMetadataStore(omexml) rdr.setId(self._file_path) self._metadata = bioformats.OMEXML(omexml.dumpXML()) return self._metadata
def read_stack(file_path, axis='T'): xml = bf.OMEXML(bf.get_omexml_metadata(file_path)) sizeZ = xml.image().Pixels.SizeZ sizeC = xml.image().Pixels.SizeC sizeT = xml.image().Pixels.SizeT sizeX = xml.image().Pixels.SizeX sizeY = xml.image().Pixels.SizeY axes = {'z': sizeZ, 'c': sizeC, 't': sizeT} ax3 = axes[axis.lower()] out_arr = np.zeros((ax3, sizeX, sizeY), dtype='uint16') with bf.ImageReader(file_path) as r: for i in range(ax3): kwargs = {axis.lower(): i} im = r.read(rescale=False, **kwargs) out_arr[i] = im return out_arr
def getMax(filename): md = bf.get_omexml_metadata(filename) #rdr = bf.ImageReader(filename, perform_init=True) ome = bf.OMEXML(md) pixels = ome.image().Pixels count = pixels.SizeT xSum = [] with bf.ImageReader(filename) as reader: for i in range(count): img = reader.read(t=i) mm = np.max(img) v2 = img >= mm / 15 im2 = img * v2 ssum = sum(sum(im2)) / sum(sum(v2)) xSum.append(ssum) reader.close() mP = max(xSum) idx = xSum.index(mP) return mP, idx
def im_compile(filename, path=None, rescale=False): """ Converts image stack to a numpy nd array using python-bioformats. Parses the metadata of an image file and constructs a numpy ndarray of the images based on the number of channels, z-planes, and timepoints. Args: filename: Filename of the image to parse. path: The path to the image file. Returns: An ndarray of the images in the order YXCZT. """ import bioformats as bf import numpy as np import os if path is None: fullfile = filename elif isinstance(path, str): fullfile = os.path.join(path, filename) metadata = bf.get_omexml_metadata(fullfile) ome_data = bf.OMEXML(xml=metadata) mat = np.zeros([ ome_data.image().Pixels.SizeY, ome_data.image().Pixels.SizeX, ome_data.image().Pixels.channel_count, ome_data.image().Pixels.SizeZ, ome_data.image().Pixels.SizeT ]) with bf.ImageReader(fullfile) as rdr: for c_cnt in range(ome_data.image().Pixels.channel_count): for t_cnt in range(ome_data.image().Pixels.SizeT): for z_cnt in range(ome_data.image().Pixels.SizeZ): image = rdr.read(c=c_cnt, t=t_cnt, z=z_cnt, rescale=rescale) mat[:, :, c_cnt, z_cnt, t_cnt] = image return mat.astype(ome_data.image().Pixels.PixelType)
def czi2tif(nameczi, nametif, slice): image_info = bioformats.get_omexml_metadata(nameczi) omxml_data = bioformats.OMEXML(image_info) pixels = omxml_data.image(slice).Pixels sx = pixels.SizeX sy = pixels.SizeY dst_ds = gdal.GetDriverByName('GTiff').Create( nametif, sx, sy, 3, gdal.GDT_Byte, ['COMPRESS=LZW', 'BIGTIFF=YES', 'TILED=YES']) r1 = dst_ds.GetRasterBand(1) r2 = dst_ds.GetRasterBand(2) r3 = dst_ds.GetRasterBand(3) step = int(np.round(sy / 4)) with bioformats.formatreader.ImageReader(nameczi) as reader: for k in range(0, sy, step): # print(k) if k + step >= sy: step = step - (k + step - sy) - 1 if step > 0: data = reader.read(nameczi, z=0, t=0, series=slice, rescale=False, XYWH=(0, k, sx, step)) data = np.array(np.reshape(data, (step, -1, 3))) r1.WriteArray(data[:, :, 0], 0, k) r2.WriteArray(data[:, :, 1], 0, k) r3.WriteArray(data[:, :, 2], 0, k) dst_ds.BuildOverviews("NEAREST", [2, 4, 8, 16, 32, 64]) dst_ds.FlushCache() # write to disk dst_ds = None
def _read_metadata(self): ## Read metadata from ONEXML metadata = bioformats.get_omexml_metadata(str(self.file_path)) o = bioformats.OMEXML(metadata) dom = o.dom self._root = dom.getroot() for ch in self._root: tag = ch.tag[ch.tag.find('}') + 1:] if 'Image' in tag: attrib = ch.attrib if 'Primary' in attrib['Name']: for ch1 in ch: tag1 = ch1.tag[ch1.tag.find('}') + 1:] attrib1 = ch1.attrib if 'Pixels' in tag1: pixels_info = attrib1 self._num_channels = int(pixels_info['SizeC']) self._num_frames = int(pixels_info['SizeT']) self._size_x = int(pixels_info['SizeX']) self._size_y = int(pixels_info['SizeY']) self._size_z = int(pixels_info['SizeZ']) self._dtype = pixels_info['Type'] self._pixelinfo = pixels_info # TODO retrieve channel names self._channel_names = [f'channel_{i}' for i in range(self._num_channels)] # TODO find better approach get_next = False for it in self._root.iter(): if it.text is not None: if get_next: self._sampling_frequency = float(it.text) get_next = False if 'frame' in it.text: get_next = True
def readBioFormatsMeta(fn): """Reads meta data out of bioformats format. .. note:: Changes system default encoding to UTF8. Args: fn (str): Path to file. Returns: OMEXML: meta data of all data. """ #Change system encoding to UTF 8 reload(sys) sys.setdefaultencoding('UTF8') #Load and convert to utf8 meta = bioformats.get_omexml_metadata(path=fn) meta = meta.decode().encode('utf-8') meta2 = bioformats.OMEXML(meta) return meta2
def genGraphSingle(filename): md = bf.get_omexml_metadata(filename) #rdr = bf.ImageReader(filename, perform_init=True) ome = bf.OMEXML(md) pixels = ome.image().Pixels count = pixels.SizeT vPZ = [] pZ = 250 for i in range(count): vPZ.append(i * pZ / 1000) xSum = [] with bf.ImageReader(filename) as reader: for i in range(count): #print(i) img = reader.read(t=i) #img = ndimage.gaussian_filter(img, sigma=(200, 200)) mm = np.max(img) v2 = img >= mm / 15 im2 = img * v2 ssum = sum(sum(im2)) / sum(sum(v2)) xSum.append(ssum) reader.close() plt.plot(vPZ, xSum) plt.show()
import javabridge import bioformats import numpy as np import nrrd import matplotlib.pyplot as plt javabridge.start_vm(class_path=bioformats.JARS) path_to_data = 'test_data/Nucleisegmentedfill.tif' depth = 't' # z or t # Get XML metadata of complete file xml_string = bioformats.get_omexml_metadata(path_to_data) ome = bioformats.OMEXML(xml_string) # Print the number of images that are in the tif print(ome.image_count) # Read the metadata from the first image -> series 0 iome = ome.image(0) series_count = ome.get_image_count() image_name = iome.get_Name() image_id = iome.get_ID() image_acquisition = iome.get_AcquisitionDate() print('Series count: ', series_count) print('Name: ', image_name) print('ID: ', image_id) print('Acquisition Date: ', image_acquisition) # Geth the pixel meta data from the image
print('User Selected: %s' %filename) # ##### Show metadata # I am using Python-bioformats version 1.1.0 and, as of 4/26/2016, there is a bug in omexml.py. To fix this bug, # - Go to your path that contains omexml.py (/anaconda/envs/env2_bioformats/lib/python2.7/site-packages/bioformats/ in my case) # - Delete omexml.pyc # - Then open omexml.py and change line 318 from 'isinstance(xml, str):' to 'isinstance(xml, basestring)' # # If you don't do this, you may get an error that says "ascii codec can't encode character" # In[60]: md = bf.get_omexml_metadata(file_full_path) ome = bf.OMEXML(md) iome = ome.image(0) # e.g. first image #print(ome.image_count) print('Image Name: %s' %iome.get_Name()) print('Image ID: %s' %iome.get_ID()) #what is image ID? print('Acquisition Date: %s' %iome.AcquisitionDate) print('') print('Bit Depth: %s' %iome.Pixels.get_PixelType()) print('XYZ Dimensions: %s x %s x %s pixels' %(iome.Pixels.get_SizeX(),iome.Pixels.get_SizeY(),iome.Pixels.get_SizeZ())) print('Time Points: %s' %iome.Pixels.get_SizeT()) print('DimensionOrder: %s' %iome.Pixels.DimensionOrder) #print('get_DimensionOrder: %s' %iome.Pixels.get_DimensionOrder()) #what is the difference between get_DimensionOrder() and DimensionOrder? print('Channels: %s' %iome.Pixels.get_SizeC()) print('Ch1: %s' %iome.Pixels.Channel(0).Name)
def _combine_omexml_elements(self, omexml_images, omexml_metadata): logger.info('combine OMEXML elements') # We assume here that each image files contains the same number images. n_images = omexml_images.values()[0].image_count * len(omexml_images) if omexml_metadata is not None: extra_omexml_available = True if not isinstance(omexml_metadata, bioformats.omexml.OMEXML): raise TypeError('Argument "omexml_metadata" must have type ' 'bioformats.omexml.OMEXML.') if omexml_metadata.image_count != n_images: raise MetadataError( 'Number of images in "omexml_metadata" must match ' 'the total number of Image elements in "omexml_images".') else: extra_omexml_available = False omexml_metadata = bioformats.OMEXML(XML_DECLARATION) omexml_metadata.image_count = n_images image_element_attributes = {'AcquisitionDate', 'Name'} channel_element_attributes = {'Name'} pixel_element_attributes = { 'PixelType', 'SizeC', 'SizeT', 'SizeX', 'SizeY', 'SizeZ' } plane_element_attributes = { 'PositionX', 'PositionY', 'PositionZ', 'TheC', 'TheT', 'TheZ' } filenames = natsorted(omexml_images) count = 0 for i, f in enumerate(filenames): omexml_img = omexml_images[f] n_series = omexml_img.image_count for s in xrange(n_series): extracted_image = omexml_img.image(s) md_image = omexml_metadata.image(count) for attr in image_element_attributes: extracted_value = getattr(extracted_image, attr) if extracted_value is not None: setattr(md_image, attr, extracted_value) extracted_pixels = extracted_image.Pixels n_planes = extracted_pixels.plane_count if n_planes == 0: # Sometimes an image doesn't have any plane elements. # Let's create them for consistency. extracted_pixels = self._create_channel_planes( extracted_pixels) n_planes = extracted_pixels.plane_count md_pixels = md_image.Pixels md_pixels.plane_count = n_planes if extra_omexml_available and (md_pixels.plane_count != n_planes): raise MetadataError( 'Image element #%d in OMEXML obtained from additional ' 'metdata files must have the same number of Plane ' 'elements as the corresponding Image elements in the ' 'OMEXML element obtained from image file "%s".' % (i, f)) for attr in pixel_element_attributes: extracted_value = getattr(extracted_pixels, attr) if extracted_value is not None: # This is python-bioformats being stupid by setting # random default values. setattr(md_pixels, attr, extracted_value) for p in xrange(n_planes): extracted_plane = extracted_pixels.Plane(p) md_plane = md_pixels.Plane(p) for attr in plane_element_attributes: extracted_value = getattr(extracted_plane, attr) md_value = getattr(md_plane, attr) if md_value is None and extracted_value is not None: setattr(md_plane, attr, extracted_value) fm = ImageFileMapping() fm.ref_index = count + p fm.files = [f] fm.series = [s] fm.planes = [p] self._file_mapper_list.append(fm) self._file_mapper_lut[f].append(fm) n_channels = extracted_pixels.channel_count md_image.channel_count = n_channels for c in xrange(n_channels): extracted_channel = extracted_pixels.Channel(c) md_channel = md_pixels.Channel(c) for attr in channel_element_attributes: extracted_value = getattr(extracted_channel, attr) if extracted_value is not None: setattr(md_channel, attr, extracted_value) count += 1 return omexml_metadata
def run_job(self, batch, assume_clean_state=False): '''Configures OMEXML metadata extracted from microscope image files and complements it with metadata retrieved from additional microscope metadata files and/or user input. The actual processing is delegated to a format-specific implementation of :class:`MetadataHandler <tmlib.workflow.metaconfig.base.MetadataHandler>`. Parameters ---------- batch: dict job description assume_clean_state: bool, optional assume that output of previous runs has already been cleaned up See also -------- :mod:`tmlib.workflow.metaconfig.cellvoyager` ''' regexp = batch.get('regex', '') if not regexp: regexp = get_microscope_type_regex( batch['microscope_type'], as_string=True )[0] with tm.utils.ExperimentSession(self.experiment_id) as session: experiment = session.query(tm.Experiment).one() plate_dimensions = experiment.plates[0].dimensions acquisition = session.query(tm.Acquisition).\ get(batch['acquisition_id']) metadata_files = session.query(tm.MicroscopeMetadataFile.location).\ filter_by(acquisition_id=batch['acquisition_id']).\ all() metadata_filenames = [f.location for f in metadata_files] image_files = session.query( tm.MicroscopeImageFile.name, tm.MicroscopeImageFile.omexml ).\ filter_by(acquisition_id=batch['acquisition_id']).\ all() omexml_images = { f.name: bioformats.OMEXML(f.omexml) for f in image_files } MetadataReader = metadata_reader_factory(batch['microscope_type']) if MetadataReader is not None: with MetadataReader() as mdreader: omexml_metadata = mdreader.read( metadata_filenames, omexml_images.keys() ) else: omexml_metadata = None MetadataHandler = metadata_handler_factory(batch['microscope_type']) mdhandler = MetadataHandler(omexml_images, omexml_metadata) mdhandler.configure_from_omexml() missing = mdhandler.determine_missing_metadata() if missing: logger.warning( 'required metadata information is missing: "%s"', '", "'.join(missing) ) logger.info( 'try to retrieve missing metadata from filenames ' 'using regular expression' ) if regexp is None: logger.warn('no regular expression provided') mdhandler.configure_from_filenames( plate_dimensions=plate_dimensions, regex=regexp ) missing = mdhandler.determine_missing_metadata() if missing: raise MetadataError( 'The following metadata information is missing:\n"%s"\n' % '", "'.join(missing) ) # Once we have collected basic metadata such as information about # channels and focal planes, we try to determine the relative position # of images within the acquisition grid try: logger.info( 'try to determine grid coordinates from microscope ' 'stage positions' ) mdhandler.determine_grid_coordinates_from_stage_positions() except MetadataError as error: logger.warning( 'microscope stage positions are not available: "%s"' % str(error) ) logger.info( 'try to determine grid coordinates from provided stitch layout' ) # In general, the values of these arguments can be ``None``, because # they are not required and may not be used. # However, in case the grid coordinates should be determined based # on user interput, these arguments are required. if not isinstance(batch['n_vertical'], int): raise TypeError( 'Value of argument "n_vertical" must be an integer.' ) if not isinstance(batch['n_horizontal'], int): raise TypeError( 'Value of argument "n_horizontal" must be an integer.' ) mdhandler.determine_grid_coordinates_from_layout( stitch_layout=batch['stitch_layout'], stitch_dimensions=(batch['n_vertical'], batch['n_horizontal']) ) if batch['perform_mip']: mdhandler.group_metadata_per_zstack() # Create consistent zero-based ids mdhandler.update_indices() mdhandler.assign_acquisition_site_indices() md = mdhandler.remove_redundant_columns() fmaps = mdhandler.create_image_file_mappings() logger.info('create database entries') with tm.utils.ExperimentSession(self.experiment_id) as session: channels = dict() bit_depth = md['bit_depth'][0] for ch_name in np.unique(md['channel_name']): logger.info('create channel "%s"', ch_name) ch = session.get_or_create( tm.Channel, experiment_id=self.experiment_id, name=ch_name, wavelength=ch_name, bit_depth=bit_depth, ) channels[ch_name] = ch.id for w in np.unique(md.well_name): with tm.utils.ExperimentSession(self.experiment_id) as session: acquisition = session.query(tm.Acquisition).\ get(batch['acquisition_id']) logger.info('create well "%s"', w) w_index = (md.well_name == w) well = session.get_or_create( tm.Well, plate_id=acquisition.plate.id, name=w ) channel_image_files = [] for s in np.unique(md.loc[w_index, 'site']): logger.debug('create site #%d', s) s_index = (md.site == s) y = md.loc[s_index, 'well_position_y'].values[0] x = md.loc[s_index, 'well_position_x'].values[0] height = md.loc[s_index, 'height'].values[0] width = md.loc[s_index, 'width'].values[0] site = session.get_or_create( tm.Site, y=y, x=x, height=height, width=width, well_id=well.id ) for index, i in md.ix[s_index].iterrows(): channel_image_files.append( tm.ChannelImageFile( tpoint=i.tpoint, zplane=i.zplane, channel_id=channels[i.channel_name], site_id=site.id, acquisition_id=acquisition.id, file_map=fmaps[index], ) ) session.bulk_save_objects(channel_image_files)