def convert_pdf(filename, output_path, resolution=150): """ Convert a PDF into images. All the pages will give a single png file with format: {pdf_filename}-{page_number}.png The function removes the alpha channel from the image and replace it with a white background. """ all_pages = Image(filename=filename, resolution=resolution) for i, page in enumerate(all_pages.sequence): with Image(page) as img: img.format = 'png' img.background_color = Color('white') img.alpha_channel = 'remove' image_filename = os.path.splitext(os.path.basename(filename))[0] if i == 0: image_filename = '{}.png'.format(image_filename) else: image_filename = '{}-{}.png'.format(image_filename, i) image_filename = os.path.join(output_path, image_filename) img.save(filename=image_filename)
def image_to_jpeg_wand(self, jpeg, preview_dims=None): ''' for jpeg, gif and bmp :param jpeg: :param size: :return: ''' logging.info('Converting image to jpeg using wand') with WImage(file=jpeg, background=Color('white')) as image: preview_dims = ImgDims(width=preview_dims.width, height=preview_dims.height) resize_dim = compute_resize_dims(dims_in=ImgDims( width=image.size[0], height=image.size[1]), dims_out=preview_dims) image.resize(resize_dim.width, resize_dim.height) content_as_bytes = image.make_blob('jpeg') output = BytesIO() output.write(content_as_bytes) output.seek(0, 0) return output
# ----get_model---- net = PixelAnchornet(pretrained=False) # .to(device) model_checkpoint = torch.load(pre_model_weight, map_location=torch.device('cpu')) net.load_state_dict(model_checkpoint) net.eval() for i, sample in enumerate(sample_list): images = [] img_path = os.path.join(sample_dir, sample) if sample[-4:] == '.pdf': with Image(filename=img_path, resolution=(200, 200)) as imgs: num_page = len(imgs.sequence) for i in range(num_page): img = Image(image=imgs.sequence[i]) img.alpha_channel = False img.background_color = Color( 'white') # Set the background color img = PI.fromarray(np.array(img), 'RGB') im_name = sample[:-4] + '_p' + str(i) images.append((img, im_name)) else: im_name = sample[:-4] img = PI.open(img_path).convert('RGB') images = [(img, im_name)] for img, im_name in images: print('-----------------传入图片的img.size:', img.size) pixle_anchor_detect(img, net, NMS_choice='ssd', img_save_pths=os.path.join( image_save_path, f'{im_name}_{i}.jpg'), img_size=2048)
def pattern_tile(import_png, format, dir_path): # formats accepted format = format.lower() #make case-insensitive if (format == 'letter'): tile_width = 7.5 tile_height = 10 guides_png = 'guides/cut_guides_letter.png' elif (format == 'tabloid'): tile_width = 15 tile_height = 10 guides_png = 'guides/cut_guides_tabloid.png' elif (format == 'a0'): tile_width = 45 tile_height = 30 guides_png = None else: print("Format must be letter, tabloid, or a0. Case sensitive.") exit() # constants ppi = 300 ppi_width = round(tile_width * ppi) ppi_height = round(tile_height * ppi) #### Full size Exported PNG operations #### # open the import_png with Image(filename=import_png) as img: img.units = 'pixelsperinch' img.resolution = 300 print(import_png + ' is ' + str(img.width / ppi) + ' by ' + str(img.height / ppi) + ' at ' + str(img.resolution[0]) + ' ' + img.units) #### Alignment Guides #### if guides_png: with Image(filename=guides_png) as guides: img.composite(guides, left=0, top=0) #### Tile Img #### currentx = 0 currenty = 0 i = 1 while currenty < img.height: while currentx < img.width: with img.clone() as cloned: cloned.crop(left=currentx, top=currenty, width=ppi_width, height=ppi_height) tile_id = dir_path + os.path.splitext( import_png)[0] + "-" + str(i).zfill(2) + ".png" cloned.save(filename=tile_id) currentx += ppi_width i += 1 currenty += ppi_height currentx = 0 print("Tiled", i - 1, format, "pattern sheets") #### Single Tile Operations #### # add tiled pngs into a list if glob.glob(dir_path + '*-*.png'): png_list = sorted(glob.glob(dir_path + '*-*.png')) pg_num = 1 for i in png_list: with Image(filename=i) as img: img.units = 'pixelsperinch' img.resolution = 300 # remove alpha channel and fill white if img.alpha_channel: print("Removing alpha channel from ", i) img.alpha_channel = 'remove' #close alpha channel img.background_color = Color('white') #img.save(filename=new_image_path) # Add border 0.5 in = 150/300 img.border('white', 150, 150) # Add lines and texts with Drawing() as draw: # cut registration lines draw.push() draw.stroke_color = Color('grey50') draw.stroke_width = 3 draw.fill_opacity = 0 draw.path_start() draw.path_move(to=(img.width, 150)) #top draw.path_horizontal_line(1) draw.path_move(to=(150, img.height)) #left draw.path_vertical_line(1) draw.path_move(to=(img.width, img.height - 150)) #bottom draw.path_horizontal_line(1) draw.path_move(to=(img.width - 150, img.height)) #right draw.path_vertical_line(1) draw.path_close() draw.path_finish() draw.pop() # texts draw.push() draw.font_size = 32 draw.text_alignment = 'center' draw.fill_color = Color('grey50') draw.font_family = "Arial" draw.text(int(img.width / 2), int(img.height - 100), "2021 © LearnMYOG.com") draw.text(int(img.width - 100), int(img.height - 100), str(pg_num)) draw(img) draw.pop() # Convert to PDF with img.convert('pdf') as converted: fileout = os.path.splitext(i)[0] + '.pdf' converted.save(filename=fileout) print("Converted", i, "from", img.format, "to", fileout, converted.format) pg_num += 1
def rotate(img, max_deg=360): deg = int(np.random.uniform(0, max_deg)) img.rotate(deg, background=Color('rgb(132,132,132)')) l, L = img.size img.crop(width=int(0.6 * l), height=int(0.6 * L), gravity='center')
def find_names(filename, rot_fl=0, blur=0): # load the example image and convert it to grayscale req_image = [] conv_img_list = [] gray_list = [] search_terms = [] doc_text = '' with Image(filename=filename, resolution=300) as image_jpeg: image_jpeg.compression_quality = 99 image_jpeg = image_jpeg.convert('jpeg') for img in image_jpeg.sequence: with Image(image=img) as img_page: img_page.background_color = Color('white') img_page.alpha_channel = 'remove' req_image.append(img_page.make_blob('jpeg')) image_jpeg.destroy() for index, img in enumerate(req_image): # txt = pytesseract.image_to_string(PI.open(io.BytesIO(img))) conv_img = PI.open(io.BytesIO(img)) conv_img = np.asarray(conv_img, dtype=np.uint8) if len(conv_img.shape) == 3: #conv_img = cv2.cvtColor(conv_img, cv2.COLOR_RGB2BGR) gray = cv2.cvtColor(conv_img, cv2.COLOR_BGR2GRAY) else: gray = conv_img gray = preprocess_for_image(gray, blur) # Rotate images if rot_fl == 1: rot = rotation_spacing(gray) else: rot = 90.0 rows, cols = gray.shape M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 90 - rot, 1) gray = cv2.warpAffine(gray, M, (cols, rows)) conv_img = cv2.warpAffine(conv_img, M, (cols, rows)) #page_text = pytesseract.image_to_string(gray, config='--psm 12 --oem 1 -c textord_heavy_nr=1') #print(page_text) #doc_text = doc_text + page_text conv_img_list.append(conv_img) gray_list.append(gray) # cv2.imwrite(filename + str(index) + '.jpg', gray) # NLP analysis #nlp_result = ner_extraction(page_text) #nlp_result_sp = nlp_sp(page_text) #labels = set([w.label_ for w in nlp_result_sp.ents]) #in_labels = ['PERSON', 'ORG', 'GPE', 'LOC', 'FAC'] #others =[] #found_name = False #for sen in nlp_result["sentences"]: # for tok in sen['tokens']: # print('Stanford tok', tok) # if tok['ner'] == 'PERSON' and not found_name and tok["word"] not in search_terms: # print('Name:', tok["word"]) # search_terms.append(tok["word"]) #print('after 1st', search_terms) #for tok in nlp_result_sp: # print('spacy tok', tok.text, tok.lemma_, tok.pos_, tok.tag_) # if tok.tag == 'PRP' and tok.text not in search_terms: # print('PRP:', tok.text) #for ent in nlp_result_sp.ents: # print('spacy ent', ent.text, ent.label_) print('SEARCH TERMS:', search_terms) return search_terms, conv_img_list, gray_list
def create_snapshot(self): logger.debug('document {pk:%s, mimetype:%s, type:%s} init snapshot' % (self.pk, self.mimetype, self.type)) if not self.attachment or not getattr(self.attachment, 'path', None): logger.debug('document {pk:%s} snapshot cannot be generated.' % self.pk) return if not os.path.exists(self.attachment.path): logger.debug( 'document {pk:%s} snapshot cannot be generated, attached file does not exist.' % self.pk) return # reconsider mimetype mimetype, encoding = mimetypes.guess_type(self.attachment.path, strict=True) if mimetype: self.mimetype = mimetype logger.debug( 'document {pk:%s, mimetype:%s, type:%s} snapshot can be generated' % (self.pk, self.mimetype, self.type)) filename = '%s.snapshot.png' % self.short_url outfile = os.path.join(settings.MEDIA_ROOT, snapshot_attachment_file_name(self, filename)) # generate dir if there is none try: os.makedirs(os.path.dirname(outfile)) except OSError: logger.debug( 'document {pk:%s, mimetype:%s, type:%s} creating folder for snapshot' % (self.pk, self.mimetype, self.type)) pass # generate thumbnail if self.mimetype.split( '/' )[0] == 'image' or self.type == Document.IMAGE or self.type == Document.PHOTO: logger.debug( 'document {pk:%s, mimetype:%s, type:%s} generating IMAGE thumbnail...' % (self.pk, self.mimetype, self.type)) # generate snapshot d = helpers.generate_snapshot( filename=self.attachment.path, output=outfile, width=settings.MILLER_SNAPSHOT_WIDTH, height=settings.MILLER_SNAPSHOT_HEIGHT) if d: self.data.update(d) self.snapshot = snapshot_attachment_file_name( self, filename ) #outfile# .save(os.path.basename(outfile), files.images.ImageFile(f), save=False) self._dirty = True logger.debug( 'document {pk:%s, mimetype:%s, type:%s} IMAGE thumbnail done.' % (self.pk, self.mimetype, self.type)) # remove tempfile # print mimetype elif self.mimetype == 'application/pdf': logger.debug( 'document {pk:%s, mimetype:%s, type:%s} generating PDF snapshot...' % (self.pk, self.mimetype, self.type)) pdffile = self.attachment.path pdf_im = PyPDF2.PdfFileReader(pdffile) # get page page = 0 try: metadata = json.loads(self.contents) page = int(metadata['thumbnail_page'] ) if 'thumbnail_page' in metadata else 0 except Exception as e: logger.exception(e) try: # Converting first page into JPG with Image(filename='%s[%s]' % (pdffile, page), resolution=150) as img: img.format = 'png' img.background_color = Color( 'white') # Set white background. img.alpha_channel = 'remove' img.save(filename=outfile) self.snapshot = snapshot_attachment_file_name( self, filename ) #outfile# .save(os.path.basename(outfile), files.images.ImageFile(f), save=False) self._dirty = True # with open(self.attachment.path + '.png') as f: # self.snapshot.save(os.path.basename(self.attachment.path)[:100] + '.png', files.images.ImageFile(f), save=False) # self._dirty = True # logger.debug('document {pk:%s, type:%s} PDF snapshot done.' % (self.pk,self.type)) except Exception as e: logger.exception(e) print 'could not save snapshot of the required resource', self.pk else: logger.debug( 'snapshot generated for document {pk:%s}, page %s' % (self.pk, page))
def process_images(identifier, downloaded_image_list, post_gif, use_wand=True, use_imageio=True): """Convert/resize all images to png.""" logger.info("Processing %d images." % len(downloaded_image_list)) logger.debug( "process_images(): identifier = {}, downloaded_image_list = {},\ use_wand = {}, use_imageio = {}".format( identifier, downloaded_image_list, use_wand, use_imageio)) image_list = [] images_for_gif = [] max_dim = [0, 0] new_image_format = "png" # also calculate average dimensions to scale down very large images dim_list_x = [] dim_list_y = [] # first loop to find maximum PDF dimensions to have high quality images for image_file in downloaded_image_list: if use_wand: # , resolution=300 try: with Image(filename="{}[0]".format(image_file)) as img: # process pdfs here only, others seem to be far too big img.format = new_image_format img.background_color = Color("white") img.compression_quality = 85 # was 75 filename = image_file img.alpha_channel = "remove" img.trim(fuzz=0.01) img.reset_coords() # equivalent of repage # give the file a different name filesplit = image_file.rsplit(".", 1) filename = filesplit[0] + "_." + filesplit[1] if filename.endswith("pdf"): filename = filename.replace(".pdf", ".%s" % new_image_format) # save image in list image_list.append(filename) img.save(filename=filename) dim_list_x.append(img.size[0]) dim_list_y.append(img.size[1]) # need to save max dimensions for gif canvas for i, _ in enumerate(max_dim): if img.size[i] > max_dim[i]: max_dim[i] = img.size[i] except CorruptImageError as corrupt_except: print(corrupt_except) print("Ignoring", image_file) except Exception as general_exception: # pylint: disable=broad-except print(general_exception) # rescale images average_dims = ( float(sum(dim_list_x)) / max(len(dim_list_x), 1), float(sum(dim_list_y)) / max(len(dim_list_y), 1), ) dim_xy = int( max(min(MAX_IMG_DIM, average_dims[0]), min(MAX_IMG_DIM, average_dims[0]))) # print(max_dim[0], max_dim[1], dim_xy, MAX_IMG_DIM) # reset max_dim again max_dim = [0, 0] # scale individual images for image_file in image_list: if use_wand: filename = image_file with Image(filename=filename) as img: # print(filename, img.size[0], img.size[1]) if (img.size[0] > dim_xy) or (img.size[1] > dim_xy): scale_factor = dim_xy / float(max(img.size[0], img.size[1])) img.resize(int(img.size[0] * scale_factor), int(img.size[1] * scale_factor)) for i, _ in enumerate(max_dim): if img.size[i] > max_dim[i]: max_dim[i] = img.size[i] img.save(filename=filename) # bring list in order again image_list = sorted(image_list) if post_gif: # now we need another loop to create the gif canvas for image_file in image_list: with Image(filename=image_file) as foreground: foreground.format = "gif" image_file = image_file.replace(".%s" % new_image_format, ".gif") # foreground.transform(resize="{0}x{1}".format(*max_dim)) add_margin = 1.03 with Image( width=int(max_dim[0] * add_margin), height=int(max_dim[1] * add_margin), background=Color("white"), ) as out: left = int( (max_dim[0] * add_margin - foreground.size[0]) / 2) top = int( (max_dim[1] * add_margin - foreground.size[1]) / 2) out.composite(foreground, left=left, top=top) out.save(filename=image_file) if use_imageio: images_for_gif.append(imageio.imread(image_file)) else: images_for_gif.append(image_file) img_size = MAX_IMG_SIZE + 1 # the gif can only have a certain size, so we loop until it's small enough while img_size > MAX_IMG_SIZE: if use_imageio: imageio.mimsave( "{id}/{id}.gif".format(id=identifier), images_for_gif, format="GIF-FI", duration=2, quantizer="nq", palettesize=256, ) else: command = "convert -delay 200 -loop 0 " # command = "gifsicle --delay=120 --loop " command += " ".join(images_for_gif) command += " {id}/{id}.gif".format(id=identifier) # command += ' > {id}/{id}.gif'.format(id=identifier) execute_command(command) img_size = os.path.getsize("{id}/{id}.gif".format(id=identifier)) if img_size > MAX_IMG_SIZE: images_for_gif = images_for_gif[:-1] logger.info( "Image to big ({} bytes), dropping last figure, {} images in GIF" .format(img_size, len(images_for_gif))) # os.remove('{id}/{id}.gif'.format(id=identifier)) # replace image list by GIF only image_list = ["{id}/{id}.gif".format(id=identifier)] return image_list
def find_names(filename, rot_fl=0, blur=0): # load the example image and convert it to grayscale image_pdf = Image(filename=filename, resolution=300) image_jpeg = image_pdf.convert('jpeg') req_image = [] conv_img_list = [] gray_list = [] search_terms = [] search_terms_sp = [] doc_text = '' fin_terms = [ 'Address', 'Administration', 'Age', 'Agree', 'Agreement', 'Allowance', 'Analysis', 'Annual', 'Approx', 'Assurance', 'Authority', 'Authorisation', 'Balanced', 'Bank', 'Benefit', 'Birth', 'Budget', 'Business', 'Capita', 'Capital', 'Capitalised', 'Cash', 'Centre', 'Charge', 'Choice', 'Civil', 'Commencement', 'Comparison', 'Conclusion', 'Condition', 'Confident', 'Confidential', 'Confirmation', 'Consumer', 'Contribution', 'Control', 'Critical', 'Customs', 'Data', 'Date', 'Death', 'Deed', 'Definition', 'Department', 'Detail', 'Direct', 'Disagree', 'Discretionary', 'Discuss', 'Employment', 'Emerging', 'Entitlement', 'Equity', 'European', 'Fact', 'FAQ', 'Feature', 'Fee', 'File', 'Final', 'Financial', 'Flexibility', 'Forename', 'Free', 'Full', 'Fund', 'General', 'Government', 'Growth', 'Guide', 'Health', 'Income', 'Increase', 'Identified', 'Index', 'Industry', 'Information', 'Insignificant', 'Insurance', 'Interest', 'International', 'Investment', 'Investor', 'Legal', 'Life', 'Lifetime', 'Limited', 'Lower', 'Lump', 'Marital', 'Member', 'Membership', 'Mobile', 'Money', 'Mutual', 'National', 'Nominated', 'Normal', 'Note', 'Number', 'Offer', 'Office', 'Ongoing', 'Option', 'Outcome', 'Partnership', 'Paying', 'Pension', 'Percentage', 'Period', 'Personal', 'Phone', 'Please', 'Portfolio', 'Post', 'Price', 'Profile', 'Protection', 'Purchase', 'Rate', 'Reason', 'Recommendation', 'Reduce', 'Reduction', 'Reference', 'Register', 'Registered', 'Regulation', 'Regulator', 'Report', 'Research', 'Request', 'Result', 'Retail', 'Retirement', 'Revenue', 'Risk', 'Salary', 'Saving', 'Scheme', 'Section', 'Service', 'Solution', 'Spouse', 'Stakeholder', 'State', 'Statement', 'Statistics', 'Status', 'Subject', 'Sum', 'Summary', 'Support', 'Surname', 'Tax', 'Taxation', 'Tel', 'Telephone', 'Total', 'Transfer', 'Trust', 'Trustee', 'Type', 'Typical', 'Typically', 'Unauthorised', 'Unit', 'Value', 'Version', 'Wealth', 'Yield', 'Your', 'Yours' ] for img in image_jpeg.sequence: img_page = Image(image=img) img_page.background_color = Color('white') img_page.alpha_channel = 'remove' req_image.append(img_page.make_blob('jpeg')) for img in req_image: # txt = pytesseract.image_to_string(PI.open(io.BytesIO(img))) conv_img = PI.open(io.BytesIO(img)) conv_img = np.asarray(conv_img, dtype=np.uint8) if len(conv_img.shape) == 3: #conv_img = cv2.cvtColor(conv_img, cv2.COLOR_RGB2BGR) gray = cv2.cvtColor(conv_img, cv2.COLOR_BGR2GRAY) else: gray = conv_img gray = preprocess_for_image(gray, blur) # Rotate images if rot_fl == 1: rot = rotation_spacing(gray) else: rot = 90.0 rows, cols = gray.shape M = cv2.getRotationMatrix2D((cols / 2, rows / 2), 90 - rot, 1) gray = cv2.warpAffine(gray, M, (cols, rows)) conv_img = cv2.warpAffine(conv_img, M, (cols, rows)) page_text = pytesseract.image_to_string( gray, config='--psm 4 -c textord_heavy_nr=1') print(page_text) doc_text = doc_text + page_text conv_img_list.append(conv_img) gray_list.append(gray) # NLP analysis nlp_result = ner_extraction(page_text) nlp_result_sp = nlp_sp(page_text) labels = set([w.label_ for w in nlp_result_sp.ents]) in_labels = ['PERSON', 'ORG', 'GPE', 'LOC', 'FAC'] others = [] for sen in nlp_result["sentences"]: for tok in sen['tokens']: #print(tok) if tok['ner'] == 'PERSON' or tok['ner'] == 'LOCATION' or tok[ 'ner'] == 'ORGANIZATION' or tok['ner'] == 'MISC': if tok["word"] not in search_terms and len(tok["word"]) > 1 and tok["word"] not in fin_terms \ and not tok["word"].islower(): search_terms.append(tok["word"]) if tok['ner'] == 'O': others.append(tok["word"]) # Find emails, NINs and phone numbers if templates(tok["word"]): search_terms.append(tok["word"]) for label in labels: if label in in_labels: entities = [ cleanup(e.string, lower=False) for e in nlp_result_sp.ents if label == e.label_ ] entities = list(set(entities)) #print(label, entities) for ent in entities: wds_list = re.split(' |\n', ent) for wd in wds_list: if wd not in search_terms and wd not in search_terms_sp and len( wd) > 1 and wd in others and not wd.islower(): search_terms_sp.append(wd) search_terms1 = [] for term in search_terms_sp: # and term.lower() not in doc_text if term not in fin_terms and term[:-1] not in fin_terms: search_terms1.append(term) else: if templates(term): search_terms1.append(term) #tel = re.search("^(\+44\s?\d{4}|\(?0\d{4}\)?)\s?\d{3}\s?\d{3}$", doc_text) #print(tel) search_terms = search_terms + search_terms1 print(search_terms) #Save search terms #text_file = open(filename[:-4] + 'terms.txt', "w") #text_file.write("%s" % search_terms1) #text_file.close() return search_terms, conv_img_list, gray_list
print(base) new = '{}.txt'.format(base) start_time = time.clock() # Open the file and read the pdf with open(file[1],'rb') as pdfFileObj, open(processed_dir + '/' + new, 'w', encoding='utf-8') as text_file: pdfFile = wi(filename = file[1], resolution = 300) image = pdfFile.convert('jpeg') #image.alpha_channel = 'remove' imageBlobs = [] for img in image.sequence: imgPage = wi(image = img) imgPage.background_color = Color("white") imgPage.alpha_channel = 'remove' imageBlobs.append(imgPage.make_blob('jpeg')) extract = [] pytesseract.pytesseract.tesseract_cmd = r"C:\Program Files\Tesseract-OCR\tesseract.exe" for imgBlob in imageBlobs: image = Image.open(io.BytesIO(imgBlob)) text = pytesseract.image_to_string(image, lang = 'eng') extract.append(text) for item in extract: text_file.write("%s\n" % item) #text_file.write(text)
def __init__(self, src_dir, out_dir): self.src = src_dir self.out = out_dir self.FONT = Font(path="{}/font.ttf".format(src_dir), color=Color("#ffffff"))
from wand.image import Image, Color from PyPDF2 import PdfFileReader, PdfFileWriter import time import tempfile import io pdf = io.BytesIO() with open("./pdf_files/large.pdf", 'rb') as infile: reader = PdfFileReader(infile) writer = PdfFileWriter() for page in range(10): writer.addPage(reader.getPage(page)) writer.write(pdf) pdf.seek(0) with (Image(file=pdf, resolution=120)) as source: images = source.sequence pages = len(images) for i in range(pages): n = i + 1 newfilename = "./img_files/pdf_page_" + str(n) + '.png' with Image(images[i]) as img: img.format = 'png' img.background_color = Color('white') # Set white background. img.alpha_channel = 'remove' img.save(filename=newfilename)
'--input_dir', help='path to directory that contains single-page PDFs') args = parser.parse_args() if __name__ == '__main__': # This list collects all the error strings total_errors = [] # Get the list of files in the input directory input_dir_files = [ f for f in listdir(args.input_dir) if isfile(join(args.input_dir, f)) ] # Get the list of files in the output directory output_dir_files = [ f for f in listdir(args.output_dir) if isfile(join(args.output_dir, f)) ] for pdf in input_dir_files: if pdf[-4:] == '.pdf': pdf_base_name = pdf[:-4] + '.png' if pdf_base_name not in output_dir_files: print pdf_base_name # Converting single page into JPG with Image(filename=join(args.input_dir, pdf), resolution=300) as img: img.format = 'png' img.background_color = Color('white') img.alpha_channel = 'remove' img.save(filename=join(args.output_dir, pdf[:-4] + '.png'))
def ProcessRecord(r, pdfDense=None): if not pdfDense: print( "ERROR: pdfDense must be passed so that it is local to a multiprocessing thread." ) ## Note the destination directories must already exist! ## If the image exists, assume it is good and skip it if (os.path.isfile(destImg(r, base="lowres"))) and (os.path.isfile( destImg(r, base="highres"))): return (None) ## Work in a temporary directory that gets automagically deleted upon completion with tempfile.TemporaryDirectory() as path: ## path="/dev/shm" srcfile = gdal.Open(srcImg(r)) if DEBUG: print(srcfile) ## Check if the source file has a gdal projection noproj = gdal.Info(srcfile) noproj = (noproj.find("PROJCRS") < 0) ## Deal with hotspots and airport diagrams etc. This is everything but plates ## if (recordType(r) in ["APD", "DAU", "DP", "HOT", "LAH", "ODP", "STAR"] or noproj): if (noproj): if DEBUG: print("No projection") if recordType( r) == "APD": ## Airport directories aren't trimmed for now trim = False pdfDense = 150 ## Temporary until such time as airport directory database can be updated. else: trim = True # -trim +repage for p in ["lowres", "highres"]: thedense = pdfDense if p == "lowres" else pdfDense * 2 writeImageNoWarp(srcImg(r), tmpDest(path, r, p), resolution=thedense, trim=trim) else: highDensityTmp = path + "/highDensityTmp.tif" ## Warp the image to high density. Do at least at 2x because high res is at 2x commstr = "gdalwarp -r %s -q -dstalpha --config GDAL_PDF_DPI %s -t_srs EPSG:3857 %s %s" % ( resampling, pdfDense * 2, srcImg(r), highDensityTmp) if (os.system(commstr)): print("Failed warping didn't work") print(commstr) #ds = gdal.Warp(highDensityTmp, srcfile, dstSRS='EPSG:3857', # height=str(size), dstAlpha=True, format="GTiff") # "lanczos", "cubicspline" , resampleAlg="nearestneighbor" #ds = None ## This is needed to ensure it is written ## ds = gdal.Translate(tmpfile2,gdal.Open(highDensityTmp),resampleAlg="lanczos", srcWin=getTrims(highDensityTmp)) for p in ["lowres", "highres"]: thedest = tmpDest(path, r, p) thesize = size if p == "lowres" else size * 2 ds = gdal.Translate(thedest, gdal.Open(highDensityTmp), resampleAlg=resampling, srcWin=getTrims(highDensityTmp), height=thesize, width=0, scaleParams=[[]]) ds = None ## Get the corner strings for the geotag cornerstr = getTagCoordinates(thedest) ## Write the png image with Image(filename=thedest) as img: #img.sharpen(radius=5.0,sigma=5.0) img.background_color = Color( 'white') # Set white background. img.alpha_channel = 'remove' extension = "png8" img.format = extension img.normalize() img.quantize(16, dither=False) # img.type = 'palette' img.save(filename=thedest + "." + extension) ## Write avare geotag into file. Suppress the warning if (extension == "png8"): commstr = ("mv %s %s" % (thedest + "." + extension, thedest)) else: commstr = "echo -n" commstr += ' && optipng -quiet %s' % (thedest) commstr += ' && exiftool -overwrite_original_in_place -q -Comment="%s" %s 2> /dev/null ' % ( cornerstr, thedest) commstr += ' && exiv2 -M"set Exif.Photo.UserComment charset=Ascii %s" %s' % ( cornerstr, thedest) ## commstr+=' && identify %s' % (thedest) commstr += ' && cwebp -quiet -lossless -z 9 -metadata exif %s -o %s' % ( thedest, thedest.replace(".png", ".webp")) ## commstr+=' && identify %s' % (thedest) if (os.system(commstr)): print("Failed at exif writing %s %s %s %s" % (r)) if DEBUG: print(commstr) ## Finally move the resulting file(*) into place for p in ["lowres", "highres"]: commstr = "mv %s %s" % (tmpDest(path, r, p), destDir(r, base=p) + "/") commstr += " && mv %s %s" % (tmpDest(path, r, p).replace( ".png", ".webp"), destDir(r, base=p) + "/") if os.system(commstr): print("Move failed for " + path + runwayID(r) + "*.png") if DEBUG: print(commstr)