예제 #1
0
    def get_md5_hash(self, measurements):
        """Compute the MD5 hash of the underlying file or use cached value

        measurements - backup for case where MD5 is calculated on image data
                       directly retrieved from URL
        """
        #
        # Cache the MD5 hash on the image reader
        #
        if is_matlab_file(self.__filename) or is_numpy_file(self.__filename):
            rdr = None
        else:
            from bioformats.formatreader import get_image_reader

            rdr = get_image_reader(None, url=self.get_url())
        if rdr is None or not hasattr(rdr, "md5_hash"):
            hasher = hashlib.md5()
            path = self.get_full_name()
            if not os.path.isfile(path):
                # No file here - hash the image
                image = self.provide_image(measurements)
                hasher.update(image.pixel_data.tostring())
            else:
                with open(self.get_full_name(), "rb") as fd:
                    while True:
                        buf = fd.read(65536)
                        if len(buf) == 0:
                            break
                        hasher.update(buf)
            if rdr is None:
                return hasher.hexdigest()
            rdr.md5_hash = hasher.hexdigest()
        return rdr.md5_hash
예제 #2
0
    def cache_file(self):
        """Cache a file that needs to be HTTP downloaded

        Return True if the file has been cached
        """
        if self.__cacheing_tried:
            return self.__is_cached
        self.__cacheing_tried = True
        #
        # Check to see if the pathname can be accessed as a directory
        # If so, handle normally
        #
        path = self.get_pathname()
        if len(path) == 0:
            filename = self.get_filename()
            if os.path.exists(filename):
                return False
            parsed_path = urllib.parse.urlparse(filename)
            url = filename
            if len(parsed_path.scheme) < 2:
                raise IOError("Test for access to file failed. File: %s" %
                              filename)
        elif os.path.exists(path):
            return False
        else:
            parsed_path = urllib.parse.urlparse(path)
            url = "/".join((path, self.get_filename()))
            #
            # Scheme length == 0 means no scheme
            # Scheme length == 1 - probably DOS drive letter
            #
            if len(parsed_path.scheme) < 2:
                raise IOError(
                    "Test for access to directory failed. Directory: %s" %
                    path)
        if parsed_path.scheme == "file":
            self.__cached_file = url2pathname(path)
        elif is_numpy_file(self.__filename):
            #
            # urlretrieve uses the suffix of the path component of the URL
            # to name the temporary file, so we replicate that behavior
            #
            temp_dir = cellprofiler_core.preferences.get_temporary_directory()
            tempfd, temppath = tempfile.mkstemp(suffix=".npy", dir=temp_dir)
            self.__cached_file = temppath
            try:
                url = generate_presigned_url(url)
                self.__cached_file, headers = urllib.request.urlretrieve(
                    url, filename=temppath)
            finally:
                os.close(tempfd)
        else:
            from bioformats.formatreader import get_image_reader

            rdr = get_image_reader(id(self), url=url)
            self.__cached_file = rdr.path
        self.__is_cached = True
        return True
예제 #3
0
    def __set_image(self):
        if self.__volume:
            self.__set_image_volume()
            return

        from bioformats.formatreader import get_image_reader

        self.cache_file()
        channel_names = []
        if is_matlab_file(self.__filename):
            img = load_data_file(self.get_full_name(), loadmat)
            self.scale = 1.0
        elif is_numpy_file(self.__filename):
            img = load_data_file(self.get_full_name(), numpy.load)
            self.scale = 1.0
        else:
            url = self.get_url()
            if url.lower().startswith("omero:"):
                rdr = get_image_reader(self.get_name(), url=url)
            else:
                rdr = get_image_reader(self.get_name(), url=self.get_url())
            if numpy.isscalar(self.index) or self.index is None:
                img, self.scale = rdr.read(
                    c=self.channel,
                    series=self.series,
                    index=self.index,
                    rescale=self.rescale
                    if isinstance(self.rescale, bool) else False,
                    wants_max_intensity=True,
                    channel_names=channel_names,
                )
            else:
                # It's a stack
                stack = []
                if numpy.isscalar(self.series):
                    series_list = [self.series] * len(self.index)
                else:
                    series_list = self.series
                if not numpy.isscalar(self.channel):
                    channel_list = [self.channel] * len(self.index)
                else:
                    channel_list = self.channel
                for series, index, channel in zip(series_list, self.index,
                                                  channel_list):
                    img, self.scale = rdr.read(
                        c=channel,
                        series=series,
                        index=index,
                        rescale=self.rescale
                        if isinstance(self.rescale, bool) else False,
                        wants_max_intensity=True,
                        channel_names=channel_names,
                    )
                    stack.append(img)
                img = numpy.dstack(stack)
        if isinstance(self.rescale, float):
            # Apply a manual rescale
            img = img.astype(numpy.float32) / self.rescale
        self.__image = Image(
            img,
            path_name=self.get_pathname(),
            file_name=self.get_filename(),
            scale=self.scale,
        )
        if img.ndim == 3 and len(channel_names) == img.shape[2]:
            self.__image.channel_names = list(channel_names)