def get_urls(url, selector): resp = get_html(url, selector) html = fromstring(resp) if 'weixin.qq.com' in url: image_urls = html.xpath('//img/@data-src') else: image_urls = html.xpath('//img/@src') image_urls = [normalize_image_url(url, image_url) for image_url in image_urls] return image_urls
def get_urls(url, selector): resp = get_html(url, selector) html = fromstring(resp) if 'weixin.qq.com' in url: image_urls = html.xpath('//img/@data-src') else: image_urls = html.xpath('//img/@src') image_urls = [ normalize_image_url(url, image_url) for image_url in image_urls ] return image_urls
from upload2cos import upload_image from watermark import watermark_text, watermark_overlay # add argument parser = argparse.ArgumentParser() parser.add_argument('url', help='target url page') parser.add_argument('selector', help='target selector') args = parser.parse_args() url = args.url selector = args.selector # get html, convert it to md html = get_html(url, selector) md = html2md(html) save_md(md) # find images on the url page image_urls = get_urls(url, selector) # upload image to COS, replace with COS access url for image_url in image_urls: print(image_url) image_path = download_image(image_url) print(image_path) new_url = upload_image(image_path) print(new_url)