def extract(self, source): """Extract an image from *source*. If the image is supported an instance of PIL's Image is returned, otherwise None. """ p = Parser() f = open_pds(source) if self.log: self.log.debug("Parsing '%s'" % (source)) self.labels = p.parse(f) if self.log: self.log.debug("Found %d labels" % (len(self.labels))) if self._check_image_is_supported(): if self.log: self.log.debug("Image in '%s' is supported" % (source)) dim = self._get_image_dimensions() loc = self._get_image_location() if self.log: self.log.debug("Image dimensions should be %s" % (str(dim))) if self.log: self.log.debug("Seeking to image data at %d" % (loc)) f.seek(loc) if self.log: self.log.debug("Seek successful, reading data") # rawImageData = f.readline() # f.seek(-int(self.labels["RECORD_BYTES"]), os.SEEK_CUR) rawImageData = f.read(dim[0] * dim[1]) if self.log: self.log.debug("Read successful (len: %d), creating Image object" % (len(rawImageData))) # The frombuffer defaults may change in a future release; # for portability, change the call to read: # frombuffer(mode, size, data, 'raw', mode, 0, 1). img = Image.frombuffer('L', dim, rawImageData, 'raw', 'L', 0, 1) if self.log: self.log.debug("Image result: %s" % (str(img))) if self.log: self.log.debug("Image info: %s" % (str(img.info))) if self.log: self.log.debug("Image size: %s" % (str(img.size))) else: if self.log: self.log.error("Image is not supported '%s'" % (source)) img = None f.close() return img, self.labels
def extract(self, source): """Extract an image from *source*. If the image is supported an instance of PIL's Image is returned, otherwise None. """ p = Parser() f = open_pds(source) if self.log: self.log.debug("Parsing '%s'" % (source)) self.labels = p.parse(f) if self.log: self.log.debug("Found %d labels" % (len(self.labels))) if self._check_image_is_supported(): if self.log: self.log.debug("Image in '%s' is supported" % (source)) dim = self._get_image_dimensions() loc = self._get_image_location() imageSampleBits = int(self.labels['IMAGE']['SAMPLE_BITS']) imageSampleType = self.labels['IMAGE']['SAMPLE_TYPE'] md5Checksum = self._get_image_checksum() if self.log: self.log.debug("Image dimensions should be %s" % (str(dim))) if self.log: self.log.debug("Seeking to image data at %d" % (loc)) f.seek(loc) if imageSampleBits == 8: readSize = dim[0] * dim[1] elif imageSampleBits == 16: readSize = dim[0] * dim[1] * 2 print readSize if self.log: self.log.debug("Seek successful, reading data (%s)" % (readSize)) # rawImageData = f.readline() # f.seek(-int(self.labels["RECORD_BYTES"]), os.SEEK_CUR) rawImageData = f.read(readSize) if md5Checksum: rawImageChecksum = hashlib.md5(rawImageData).hexdigest() checksumVerificationPassed = rawImageChecksum == md5Checksum and True or False if not checksumVerificationPassed: if self.log: self.log.debug("Secure hash verification failed") if self.raisesChecksumError: errorMessage = "Verification failed! Expected '%s' but got '%s'." % (md5Checksum, rawImageChecksum) raise ChecksumError, errorMessage else: if self.log: self.log.debug("Secure hash verification passed") if self.log: self.log.debug("Read successful (len: %d), creating Image object" % (len(rawImageData))) # The frombuffer defaults may change in a future release; # for portability, change the call to read: # frombuffer(mode, size, data, 'raw', mode, 0, 1). if (imageSampleBits == 16) and imageSampleType == ('MSB_INTEGER'): #img = Image.frombuffer('I', dim, rawImageData, 'raw', 'I;16BS', 0, 1) img = Image.frombuffer('F', dim, rawImageData, 'raw', 'F;16B', 0, 1) img = ImageMath.eval("convert(a/16.0, 'L')", a=img) else: img = Image.frombuffer('L', dim, rawImageData, 'raw', 'L', 0, 1) if self.log: self.log.debug("Image result: %s" % (str(img))) self.log.debug("Image info: %s" % (str(img.info))) self.log.debug("Image mode: %s" % (str(img.mode))) self.log.debug("Image size: %s" % (str(img.size))) else: if self.log: self.log.error("Image is not supported '%s'" % (source)) img = None f.close() return img, self.labels
def extract(self, source): """Extract an image from *source*. If the image is supported an instance of PIL's Image is returned, otherwise None. """ p = Parser() f = open_pds(source) pdsdatadir, pdsfile = os.path.split(source) if self.log: self.log.debug("Parsing '%s'" % (source)) self.labels = p.parse(f) if self.log: self.log.debug("Found %d labels" % (len(self.labels))) if self._check_table_is_supported(): if self.log: self.log.debug("Table in '%s' is supported" % (source)) dim = self._get_table_dimensions() # Get the location of the table location = self._get_table_location().strip().replace('"', "") # location = os.path.join(pdsdatadir,location) # Get the structure of the table from the pointer struct_fname = self._get_table_structure().strip().replace('"', "") structurefile = getPdsFileName(struct_fname, pdsdatadir) sp = ColumnParser() s = open_pds(structurefile) slabels = sp.parse(s) columns = [] for l in slabels: columns.append(l["COLUMN"]["NAME"].strip().replace('"', "")) if self.log: self.log.debug("Found %d columns" % (len(columns))) if self.labels["TABLE"]["INTERCHANGE_FORMAT"] == "ASCII": locationfile = getPdsFileName(location, pdsdatadir) tbl = csv.DictReader(open(locationfile), fieldnames=columns, delimiter=" ") else: if self.log: self.log.error("Table is not supported '%s'" % (source)) tbl = None f.close() return tbl, self.labels
def extract(self, source): """Extract an image from *source*. If the image is supported an instance of PIL's Image is returned, otherwise None. """ p = Parser() f = open_pds(source) pdsdatadir, pdsfile = os.path.split(source) if self.log: self.log.debug("Parsing '%s'" % (source)) self.labels = p.parse(f) if self.log: self.log.debug("Found %d labels" % (len(self.labels))) if self._check_table_is_supported(): if self.log: self.log.debug("Table in '%s' is supported" % (source)) dim = self._get_table_dimensions() # Get the location of the table location = self._get_table_location().strip().replace("\"", "") #location = os.path.join(pdsdatadir,location) # Get the structure of the table from the pointer struct_fname = self._get_table_structure().strip().replace( "\"", "") structurefile = getPdsFileName(struct_fname, pdsdatadir) sp = ColumnParser() s = open_pds(structurefile) slabels = sp.parse(s) columns = [] for l in slabels: columns.append(l['COLUMN']['NAME'].strip().replace("\"", "")) if self.log: self.log.debug("Found %d columns" % (len(columns))) if self.labels['TABLE']['INTERCHANGE_FORMAT'] == 'ASCII': locationfile = getPdsFileName(location, pdsdatadir) tbl = csv.DictReader(open(locationfile), fieldnames=columns, delimiter=' ') else: if self.log: self.log.error("Table is not supported '%s'" % (source)) tbl = None f.close() return tbl, self.labels
def test_no_exceptions(self): import os from core import open_pds testDataDir = '../../test_data/' outputDir = '../../tmp/' imgExtractor = ImageExtractor(log="ImageExtractor_Unit_Tests") for root, dirs, files in os.walk(testDataDir): for name in files: filename = os.path.join(root, name) print filename img, _ = imgExtractor.extract(open_pds(filename)) try: if img: img.save(outputDir + name + '.jpg') except Exception, e: # Re-raise the exception, causing this test to fail. raise else: # The following is executed if and when control flows off the end of the try clause. assert True