def replace_fn_no_caption_tag(find_str: str) -> str: print(find_str) caption_start_pos = power_edit.find_nth_match(find_str, '[', 2) + 1 print(f'caption_start_pos = {caption_start_pos}') caption_stop_pos = power_edit.strict_find(find_str, ']', caption_start_pos) print(f'caption_stop_pos = {caption_stop_pos}') caption = find_str[caption_start_pos:caption_stop_pos] # Convert all " to ' caption = caption.replace('"', "'") print(f'caption = {caption}') url_start_pos = caption_stop_pos + 2 url_stop_pos = power_edit.strict_find(find_str, ')', caption_stop_pos) url = find_str[url_start_pos: url_stop_pos] print(f'url = {url}') # Substitue values into template template = '{{< figure src="<URL>" caption="<CAPTION>" caption-position="bottom" >}}' template = template.replace('<URL>', url) template = template.replace('<CAPTION>', caption) global num_replacements num_replacements += 1 return template
def replace_fn_single_pic(find_str: str, file_path: str): print(f'find_str = {find_str}, file_path = {file_path}') id_start_pos = power_edit.strict_find(find_str, 'id=', 0) + 3 print(f'id_start_pos = {id_start_pos}') id_stop_pos = power_edit.strict_find(find_str, ' ', id_start_pos) print(f'id_stop_pos = {id_stop_pos}') id = find_str[id_start_pos:id_stop_pos] print(f'id = {id}') # Calculate width try: width_start_pos = power_edit.strict_find(find_str, 'w=', 0) + 2 width_stop_pos = power_edit.strict_find(find_str, ' ', width_start_pos) img_width = find_str[width_start_pos:width_stop_pos] except RuntimeError: img_width = None print(f'img_width = {img_width}') # Calculate the URL url_frag_start_pos = power_edit.strict_find(file_path, 'content\\pages\\', 0) + 14 print(f'url_frag_start_pos = {url_frag_start_pos}') url_frag = file_path[url_frag_start_pos:-10] print(f'url_frag = {url_frag}') full_url = 'http://blog.mbedded.ninja/' + url_frag.replace('\\', '/') print(f'full_url = {full_url}') #================================ # CHECK IF HTML ALREADY SAVED #================================ html_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'html') # Make path safe file_name = full_url.replace('/', '') file_name = file_name.replace(':', '') file_name = file_name + '.html' file_path = os.path.join(html_dir, file_name) print(f'file_path = {file_path}') if os.path.isfile(file_path): print('HTML already downloaded.') with open(file_path, 'r', encoding='utf-8') as the_file: html = the_file.read() else: print('HTML not downloaded, so downloading...') response = requests.get(full_url) html = response.content.decode('utf-8') with open(file_path, 'w+', encoding='utf-8') as the_file: the_file.write(html) # print(f'html = {html}') #=================================== # EXTRACT HTML ELEMENTS #=================================== parser = AdvancedHTMLParser.AdvancedHTMLParser() parser.parseStr(html) elements = parser.getElementsByAttr('data-image-id', id) print(f'elements = {elements}') assert len( elements ) >= 1 # We could find the same image multiple times on the page, this is o.k. image_url = elements[0].href caption = elements[0].getAttribute('data-description') caption = caption.replace('"', "'") # Remove any " from caption print(f'caption = {caption}') #=================================== # IMAGE FINDING/DOWNLOADING #=================================== img_file_name = image_url.split('/')[-1] print(f'img_file_name = {img_file_name}') img_sec_path = image_url[45:] print(f'img_sec_path = {img_sec_path}') img_base_dir = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'data', 'img') print(f'img_base_dir = {img_base_dir}') img_path = os.path.join(img_base_dir, img_sec_path.replace('/', '\\')) print(f'img_path = {img_path}') if os.path.isfile(img_path): print(f'Image alredy downloaded') else: print(f'Image not downloaded, downloading...') os.makedirs(os.path.dirname(img_path), exist_ok=True) r = requests.get(image_url, stream=True) if r.status_code == 200: with open(img_path, 'wb') as f: r.raw.decode_content = True shutil.copyfileobj(r.raw, f) #============================= # COPY TO HUGO IF NOT ALREADY THERE #============================= hugo_image_base_path = os.path.join( 'C:\\Users\\gbmhu\\code\\Hugo\\quickstart\\static\\images\\') hugo_image_path = os.path.join(hugo_image_base_path, img_sec_path.replace('/', '\\')) if os.path.isfile(hugo_image_path): print(f'Image already exists in Hugo, not copying.') else: print('Copying image to hugo site.') os.makedirs(os.path.dirname(hugo_image_path), exist_ok=True) shutil.copyfile(img_path, hugo_image_path) #=========================== # CREATE REPLACEMENT TEXT #=========================== hugo_rel_img_path = '/images/' + img_sec_path print(f'hugo_rel_img_path = {hugo_rel_img_path}') template = '{{< figure <URL> <CAPTION> caption-position="bottom" <WIDTH> >}}' template = template.replace('<URL>', 'src="' + hugo_rel_img_path + '"') if caption is not None: template = template.replace('<CAPTION>', 'caption="' + caption + '"') else: template = template.replace('<CAPTION>', '') if img_width is None: template = template.replace('<WIDTH>', '') else: template = template.replace('<WIDTH>', 'width="' + img_width + 'px"') print(f'replacment_text = {template}') global num_replacements num_replacements += 1 return template