def thumb(m): d = m.groupdict() url = d['url'] old_th = d['th'] code_origin = m.group() code_normal = '[url={0}][img]{1}[/img][/url]' tname = 't' + hashurl(url) + '.jpg' th = rehost_m.cache_search(tname) if th is not None: print('. {0} - from cache'.format(th)) return code_normal.format(url, th) try: i = Image.open(open_thing(url)[0]) if old_th != url: t = Image.open(open_thing(old_th)[0]) f1 = float(i.size[1]) / i.size[0] f2 = float(t.size[1]) / t.size[0] if abs(f1 - f2) / (f1 + f2) < 0.02 and t.size[0] >= 180: print('. {0} - good'.format(old_th)) rehost_m.cache_write(tname, old_th) return code_origin i.thumbnail(THUMB_SIZE, Image.ANTIALIAS) i.save(tname, quality=85) except IOError as ex: print(ex) return code_origin th = rehost(tname, force_cache=True) try: os.unlink(tname) except: pass print('. {0} - new'.format(th)) return code_normal.format(url, th)
def process(s): global urls urls = {} # Hackity hack. s = s.split('class="attach')[0].split('<') s.pop() s = '<'.join(s) # Cut out bad tags. for t in SKIP_TAGS: s = re.sub(FLAGS + '\s*<(?P<tag>' + t + ').*?</(?P=tag)>\s*', '', s) # Apply simple rules. for (k, r) in SIMPLE_RULES: s = re.sub(FLAGS + k, r, s) # Close tags that should be closed, leave already closed as-is for t in CLOSED_TAGS: s = re.sub(FLAGS + r'<({0}[^>]*?)/?>'.format(t), r'<\1/>', s) # Maybe this is overkill, but why not. s = s.replace('</{0}>'.format(t), '') # Apply complex rules. (s, n) = ntag_re.subn(proctag, s) m, n = n, 1 while n > 0: (s, n) = ptag_re.subn(proctag, s) m += n # Strip out any HTML leftovers. s = re.sub('<[^>]+>','',s) if m > 0: print('Replaced {0} tags'.format(m)) if not args.no_rehost and len(urls) > 0: def print_urls(a, b): if a != b: print('{0} >> {1}'.format(a, b)) print('Processing {0} URLs...'.format(len(urls))) # Rehost images. if gevent: pool = Pool(POOL_SIZE) def fin(h, url): def f(g): urls[h] = g.value print_urls(url, g.value) return f for h, url in urls.iteritems(): j = pool.spawn(rehost, url, image=True, referer=target_root) j.link_value(fin(h, url)) pool.join() else: for h, url in urls.iteritems(): new_url = rehost(url, image=True, referer=target_root) urls[h] = new_url print_urls(url, new_url) # Bring URLs back in places. imgs = 0 for p, url in urls.iteritems(): if hashurl(url) != p: imgs += 1 s = s.replace(p, urls[p]) if imgs > 0: print('Found and replaced {0} images'.format(imgs)) return decode_html_entities(s).strip()
def process(s): global urls urls = {} # Hackity hack. s = s.split('class="attach')[0].split('<') s.pop() s = '<'.join(s) # Cut out bad tags. for t in SKIP_TAGS: s = re.sub(FLAGS + '\s*<(?P<tag>' + t + ').*?</(?P=tag)>\s*', '', s) # Apply simple rules. for (k, r) in SIMPLE_RULES: s = re.sub(FLAGS + k, r, s) # Close tags that should be closed, leave already closed as-is for t in CLOSED_TAGS: s = re.sub(FLAGS + r'<({0}[^>]*?)/?>'.format(t), r'<\1/>', s) # Maybe this is overkill, but why not. s = s.replace('</{0}>'.format(t), '') # Apply complex rules. (s, n) = ntag_re.subn(proctag, s) m, n = n, 1 while n > 0: (s, n) = ptag_re.subn(proctag, s) m += n # Strip out any HTML leftovers. s = re.sub('<[^>]+>', '', s) if m > 0: print('Replaced {0} tags'.format(m)) if not args.no_rehost and len(urls) > 0: def print_urls(a, b): if a != b: print('{0} >> {1}'.format(a, b)) print('Processing {0} URLs...'.format(len(urls))) # Rehost images. if gevent: pool = Pool(POOL_SIZE) def fin(h, url): def f(g): urls[h] = g.value print_urls(url, g.value) return f for h, url in urls.iteritems(): j = pool.spawn(rehost, url, image=True, referer=target_root) j.link_value(fin(h, url)) pool.join() else: for h, url in urls.iteritems(): new_url = rehost(url, image=True, referer=target_root) urls[h] = new_url print_urls(url, new_url) # Bring URLs back in places. imgs = 0 for p, url in urls.iteritems(): if hashurl(url) != p: imgs += 1 s = s.replace(p, urls[p]) if imgs > 0: print('Found and replaced {0} images'.format(imgs)) return decode_html_entities(s).strip()