Exemple #1
0
def twitter_users():
    """Show list of indexed twitter users"""
    conn = sqlite3.connect('live/twitter_scraper.db')
    c = conn.cursor()

    # get all users
    c.execute('SELECT user FROM users')
    users = c.fetchall()

    # get deleted users
    try:
        c.execute('SELECT user FROM deleted_users')
        deleted_users = [x[0] for x in c.fetchall()]
    except:
        deleted_users = []

    c.close()

    # combine users and deleted users
    users = [(tup[0], tup[0] not in deleted_users) for tup in sorted(users)]

    user_count = len(users)
    kwargs = {
        'users': users,
        'user_count': user_count,
        'page_title': 'Indexed Twitter Users',
    }

    return render_page('twitter_users.html', **kwargs)
Exemple #2
0
def dc_app_image(path):
    """Get HQ version of DC app picture"""
    # verify link
    x = re.match(
        r"((http://|https://)?file\.candlemystar\.com/cache/.*(_\d+x\d+)\.\w+$)",
        path)
    if x is None:
        raise FullSizeDCAppImage
    else:
        # get full size image
        image_link = path.replace('cache/', '')
        image_link = image_link.replace('thumb-', '')
        image_link = image_link.replace(x.groups()[2], '')

        # request image link
        if False:
            try:
                response = cached_req_session.get(image_link, timeout=30)
            except requests.exceptions.MissingSchema:
                image_link = 'https://' + image_link
                response = cached_req_session.get(image_link, timeout=30)

            if response.status_code == 200:
                app_direct_image = True
            else:
                error_msg = 'Error: Image could not be found'
                raise InvalidDCAppLink

        app_images = f'<img class="app_img" src={image_link}>\n'

        kwargs = {}
        kwargs['image_link'] = image_link
        kwargs['url'] = path

        return render_page('dc_app_image.html', **kwargs)
Exemple #3
0
def handle_exception(e):
    """Generic http error handler"""
    if request.full_path == '/' or request.full_path == '/?':
        return render_page('sourcecatcher.html')

    print(e)

    error_msg = f'<div class="error_code">{e.code} {e.name}</div><br>{e.description}'
    kwargs = {
        'embed': None,
        'app': False,
        'app_direct_image': False,
        'results': True,
        'error_msg': error_msg,
        'page_title': 'Error',
    }
    return render_page('error.html', code=e.code, **kwargs)
Exemple #4
0
def entity_too_large(e):
    """Error page if uploaded file is too large"""
    kwargs = {
        'app': False,
        'app_direct_image': False,
        'results': True,
        'error_msg': EntityTooLarge().__str__(),
        'page_title': 'Error',
    }
    return render_page('error.html', **kwargs)
Exemple #5
0
def find_and_render(location, path):
    """Try to find a matching image and render the results webpage"""
    error_msg = None
    error_reasons = None
    error_link = None
    warning_msg = None
    code = 200

    try:
        # return error if url is for DC app
        if location == 'url':
            extract = tldextract.extract(path)
            if extract.subdomain == 'dreamcatcher' and \
                    extract.domain == 'candlemystar' and \
                    extract.suffix == 'com':
                raise SCError('DC App has closed and is no longer supported')
                #  return dc_app(path)

        # clear image_search() lru cache if database was updated
        db_mtime = os.path.getmtime('live/twitter_scraper.db')
        if redis_db.get("db_mtime") != bytes(str(db_mtime), "utf-8"):
            print("clearing image_search() cache")
            image_search_cache.clear()
        redis_db.set("db_mtime", str(db_mtime))

        # find matching results
        ret_kwargs = image_search(location, path)

        return render_page('match_results.html', **ret_kwargs)

    except TWError as e:
        error_msg = str(e)
        error_link = e.link
        print(e)

    except NoMatchesFound as e:
        error_msg = str(e)
        error_reasons = e.reasons()
        code = 404
        print(e)

    except SCError as e:
        error_msg = str(e)
        code = 400
        print(e)

    except Exception as e:
        error_msg = "An unknown error occurred"
        traceback.print_exc()
        code = 500
        print(e)

    kwargs = {
        'error_msg': error_msg,
        'error_reasons': error_reasons,
        'error_link': error_link,
        'warning_msg': warning_msg,
        'page_title': 'Error',
        'code': code,
    }

    if location == 'url':
        kwargs['url'] = path

    # did not find any matches
    return render_page('error.html', **kwargs)
Exemple #6
0
def root():
    # remove old requests from cache
    cached_req_session.cache.remove_old_entries(datetime.now() -
                                                req_expire_after)

    return render_page('sourcecatcher.html')
Exemple #7
0
def dc_app(path):
    """Get HQ pictures from DC app"""
    parsed_html = get_parsed_html(path)

    app_images = None
    app_video = None
    app_video_poster = None
    dcapp_id = extract_id(path)

    # match image urls
    regex = r"(?P<protocol>http:\/\/|https:\/\/)file\.candlemystar\.com\/(?P<cache>cache\/)?.*?(?P<imgdim>_\d+x\d+)?\.\w+"

    try:
        # try to find video
        app_video = find_video(parsed_html)
        app_video_poster = parsed_html.body.find('video').attrs['poster']
    except:
        # find all images from app post
        images_html = ''.join([
            str(h)
            for h in parsed_html.body.find_all('div',
                                               attrs={'class': 'img-box'})
        ])
        x = re.finditer(regex, images_html)

        # create urls for full-size images
        files = []
        for match in x:
            url = match.group(0)
            if match.groupdict()["cache"] is not None:
                url = url.replace(match.groupdict()["cache"], '')
            url = url.replace('thumb-', '')
            if match.groupdict()["imgdim"] is not None:
                url = url.replace(match.groupdict()["imgdim"], '')
            files.append(url)

        # remove duplicates
        app_images = list(OrderedDict.fromkeys(files))

    # find post username and text
    app_poster = parsed_html.body.find('div', attrs={
        'class': 'card-name'
    }).text.strip()
    app_text = parsed_html.body.find('div', attrs={
        'class': 'card-text'
    }).text.strip()

    # find profile picture
    profile_pic = parsed_html.body.find('div', attrs={
        'class': 'profile-img'
    }).find('img').attrs['src']
    try:
        match = re.match(regex, profile_pic)
        url = match.group(0)
        if match.groupdict()["cache"] is not None:
            url = url.replace(match.groupdict()["cache"], '')
        url = url.replace('thumb-', '')
        if match.groupdict()["imgdim"] is not None:
            url = url.replace(match.groupdict()["imgdim"], '')
        profile_pic = url
    except Exception as e:
        print(f"Error getting full size profile picture {e}")

    kwargs = {}
    kwargs['dcapp_id'] = dcapp_id
    kwargs['app_video'] = app_video
    kwargs['app_video_poster'] = app_video_poster
    kwargs['app_images'] = app_images
    kwargs['app_poster'] = app_poster
    kwargs['app_text'] = app_text
    kwargs['profile_pic'] = profile_pic
    kwargs['url'] = path
    kwargs['page_title'] = f'DC App #{dcapp_id}'

    return render_page('dc_app.html', **kwargs)