def get_md5_hash(self, measurements): """Compute the MD5 hash of the underlying file or use cached value measurements - backup for case where MD5 is calculated on image data directly retrieved from URL """ # # Cache the MD5 hash on the image reader # if is_matlab_file(self.__filename) or is_numpy_file(self.__filename): rdr = None else: from bioformats.formatreader import get_image_reader rdr = get_image_reader(None, url=self.get_url()) if rdr is None or not hasattr(rdr, "md5_hash"): hasher = hashlib.md5() path = self.get_full_name() if not os.path.isfile(path): # No file here - hash the image image = self.provide_image(measurements) hasher.update(image.pixel_data.tostring()) else: with open(self.get_full_name(), "rb") as fd: while True: buf = fd.read(65536) if len(buf) == 0: break hasher.update(buf) if rdr is None: return hasher.hexdigest() rdr.md5_hash = hasher.hexdigest() return rdr.md5_hash
def cache_file(self): """Cache a file that needs to be HTTP downloaded Return True if the file has been cached """ if self.__cacheing_tried: return self.__is_cached self.__cacheing_tried = True # # Check to see if the pathname can be accessed as a directory # If so, handle normally # path = self.get_pathname() if len(path) == 0: filename = self.get_filename() if os.path.exists(filename): return False parsed_path = urllib.parse.urlparse(filename) url = filename if len(parsed_path.scheme) < 2: raise IOError("Test for access to file failed. File: %s" % filename) elif os.path.exists(path): return False else: parsed_path = urllib.parse.urlparse(path) url = "/".join((path, self.get_filename())) # # Scheme length == 0 means no scheme # Scheme length == 1 - probably DOS drive letter # if len(parsed_path.scheme) < 2: raise IOError( "Test for access to directory failed. Directory: %s" % path) if parsed_path.scheme == "file": self.__cached_file = url2pathname(path) elif is_numpy_file(self.__filename): # # urlretrieve uses the suffix of the path component of the URL # to name the temporary file, so we replicate that behavior # temp_dir = cellprofiler_core.preferences.get_temporary_directory() tempfd, temppath = tempfile.mkstemp(suffix=".npy", dir=temp_dir) self.__cached_file = temppath try: url = generate_presigned_url(url) self.__cached_file, headers = urllib.request.urlretrieve( url, filename=temppath) finally: os.close(tempfd) else: from bioformats.formatreader import get_image_reader rdr = get_image_reader(id(self), url=url) self.__cached_file = rdr.path self.__is_cached = True return True
def __set_image(self): if self.__volume: self.__set_image_volume() return from bioformats.formatreader import get_image_reader self.cache_file() channel_names = [] if is_matlab_file(self.__filename): img = load_data_file(self.get_full_name(), loadmat) self.scale = 1.0 elif is_numpy_file(self.__filename): img = load_data_file(self.get_full_name(), numpy.load) self.scale = 1.0 else: url = self.get_url() if url.lower().startswith("omero:"): rdr = get_image_reader(self.get_name(), url=url) else: rdr = get_image_reader(self.get_name(), url=self.get_url()) if numpy.isscalar(self.index) or self.index is None: img, self.scale = rdr.read( c=self.channel, series=self.series, index=self.index, rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, ) else: # It's a stack stack = [] if numpy.isscalar(self.series): series_list = [self.series] * len(self.index) else: series_list = self.series if not numpy.isscalar(self.channel): channel_list = [self.channel] * len(self.index) else: channel_list = self.channel for series, index, channel in zip(series_list, self.index, channel_list): img, self.scale = rdr.read( c=channel, series=series, index=index, rescale=self.rescale if isinstance(self.rescale, bool) else False, wants_max_intensity=True, channel_names=channel_names, ) stack.append(img) img = numpy.dstack(stack) if isinstance(self.rescale, float): # Apply a manual rescale img = img.astype(numpy.float32) / self.rescale self.__image = Image( img, path_name=self.get_pathname(), file_name=self.get_filename(), scale=self.scale, ) if img.ndim == 3 and len(channel_names) == img.shape[2]: self.__image.channel_names = list(channel_names)