def twitter_users(): """Show list of indexed twitter users""" conn = sqlite3.connect('live/twitter_scraper.db') c = conn.cursor() # get all users c.execute('SELECT user FROM users') users = c.fetchall() # get deleted users try: c.execute('SELECT user FROM deleted_users') deleted_users = [x[0] for x in c.fetchall()] except: deleted_users = [] c.close() # combine users and deleted users users = [(tup[0], tup[0] not in deleted_users) for tup in sorted(users)] user_count = len(users) kwargs = { 'users': users, 'user_count': user_count, 'page_title': 'Indexed Twitter Users', } return render_page('twitter_users.html', **kwargs)
def dc_app_image(path): """Get HQ version of DC app picture""" # verify link x = re.match( r"((http://|https://)?file\.candlemystar\.com/cache/.*(_\d+x\d+)\.\w+$)", path) if x is None: raise FullSizeDCAppImage else: # get full size image image_link = path.replace('cache/', '') image_link = image_link.replace('thumb-', '') image_link = image_link.replace(x.groups()[2], '') # request image link if False: try: response = cached_req_session.get(image_link, timeout=30) except requests.exceptions.MissingSchema: image_link = 'https://' + image_link response = cached_req_session.get(image_link, timeout=30) if response.status_code == 200: app_direct_image = True else: error_msg = 'Error: Image could not be found' raise InvalidDCAppLink app_images = f'<img class="app_img" src={image_link}>\n' kwargs = {} kwargs['image_link'] = image_link kwargs['url'] = path return render_page('dc_app_image.html', **kwargs)
def handle_exception(e): """Generic http error handler""" if request.full_path == '/' or request.full_path == '/?': return render_page('sourcecatcher.html') print(e) error_msg = f'<div class="error_code">{e.code} {e.name}</div><br>{e.description}' kwargs = { 'embed': None, 'app': False, 'app_direct_image': False, 'results': True, 'error_msg': error_msg, 'page_title': 'Error', } return render_page('error.html', code=e.code, **kwargs)
def entity_too_large(e): """Error page if uploaded file is too large""" kwargs = { 'app': False, 'app_direct_image': False, 'results': True, 'error_msg': EntityTooLarge().__str__(), 'page_title': 'Error', } return render_page('error.html', **kwargs)
def find_and_render(location, path): """Try to find a matching image and render the results webpage""" error_msg = None error_reasons = None error_link = None warning_msg = None code = 200 try: # return error if url is for DC app if location == 'url': extract = tldextract.extract(path) if extract.subdomain == 'dreamcatcher' and \ extract.domain == 'candlemystar' and \ extract.suffix == 'com': raise SCError('DC App has closed and is no longer supported') # return dc_app(path) # clear image_search() lru cache if database was updated db_mtime = os.path.getmtime('live/twitter_scraper.db') if redis_db.get("db_mtime") != bytes(str(db_mtime), "utf-8"): print("clearing image_search() cache") image_search_cache.clear() redis_db.set("db_mtime", str(db_mtime)) # find matching results ret_kwargs = image_search(location, path) return render_page('match_results.html', **ret_kwargs) except TWError as e: error_msg = str(e) error_link = e.link print(e) except NoMatchesFound as e: error_msg = str(e) error_reasons = e.reasons() code = 404 print(e) except SCError as e: error_msg = str(e) code = 400 print(e) except Exception as e: error_msg = "An unknown error occurred" traceback.print_exc() code = 500 print(e) kwargs = { 'error_msg': error_msg, 'error_reasons': error_reasons, 'error_link': error_link, 'warning_msg': warning_msg, 'page_title': 'Error', 'code': code, } if location == 'url': kwargs['url'] = path # did not find any matches return render_page('error.html', **kwargs)
def root(): # remove old requests from cache cached_req_session.cache.remove_old_entries(datetime.now() - req_expire_after) return render_page('sourcecatcher.html')
def dc_app(path): """Get HQ pictures from DC app""" parsed_html = get_parsed_html(path) app_images = None app_video = None app_video_poster = None dcapp_id = extract_id(path) # match image urls regex = r"(?P<protocol>http:\/\/|https:\/\/)file\.candlemystar\.com\/(?P<cache>cache\/)?.*?(?P<imgdim>_\d+x\d+)?\.\w+" try: # try to find video app_video = find_video(parsed_html) app_video_poster = parsed_html.body.find('video').attrs['poster'] except: # find all images from app post images_html = ''.join([ str(h) for h in parsed_html.body.find_all('div', attrs={'class': 'img-box'}) ]) x = re.finditer(regex, images_html) # create urls for full-size images files = [] for match in x: url = match.group(0) if match.groupdict()["cache"] is not None: url = url.replace(match.groupdict()["cache"], '') url = url.replace('thumb-', '') if match.groupdict()["imgdim"] is not None: url = url.replace(match.groupdict()["imgdim"], '') files.append(url) # remove duplicates app_images = list(OrderedDict.fromkeys(files)) # find post username and text app_poster = parsed_html.body.find('div', attrs={ 'class': 'card-name' }).text.strip() app_text = parsed_html.body.find('div', attrs={ 'class': 'card-text' }).text.strip() # find profile picture profile_pic = parsed_html.body.find('div', attrs={ 'class': 'profile-img' }).find('img').attrs['src'] try: match = re.match(regex, profile_pic) url = match.group(0) if match.groupdict()["cache"] is not None: url = url.replace(match.groupdict()["cache"], '') url = url.replace('thumb-', '') if match.groupdict()["imgdim"] is not None: url = url.replace(match.groupdict()["imgdim"], '') profile_pic = url except Exception as e: print(f"Error getting full size profile picture {e}") kwargs = {} kwargs['dcapp_id'] = dcapp_id kwargs['app_video'] = app_video kwargs['app_video_poster'] = app_video_poster kwargs['app_images'] = app_images kwargs['app_poster'] = app_poster kwargs['app_text'] = app_text kwargs['profile_pic'] = profile_pic kwargs['url'] = path kwargs['page_title'] = f'DC App #{dcapp_id}' return render_page('dc_app.html', **kwargs)