Example #1
0
def check_url_status(item):
    status_code = get_url_status_code(item.url)

    if item.http_status != status_code:
        update_item(item, http_status=status_code)
        if isinstance(item, Indexed):
            item.reindex()
Example #2
0
def update_screenshot(item, force=False):

    api_key = getattr(settings, "URL2PNG_KEY", None)
    api_secret = getattr(settings, "URL2PNG_SECRET", None)
    if not api_key or not api_secret:
        screenshot_logger.warning(u"Either URL2PNG_KEY or URL2PNG_SECRET is missing. Can't fetch screenshot.")
        return

    url = item.url

    url = url.strip()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
    if not url:
        return

    if item.http_status != 200:
        screenshot_logger.warning(u"Skipping '%s'. HTTP status is not 200." % item.url)
        return

    if item.screenshot and not force:
        screenshot_logger.warning(u"Skipping '%s'. Screenshot exists already." % item.url)
        return

    url_hash = md5_constructor(smart_str(url)).hexdigest()
    filename = "%s-%i-%s.png" % (item._meta.object_name.lower(), item.id, url_hash)
    filename = os.path.join(item.screenshot.field.get_directory_name(), filename)

    # Remove existing screenshot
    if item.screenshot:
        try:
            delete(item.screenshot)
        except OSError:
            pass
        update_item(item, screenshot=None)

    full_path = os.path.join(settings.MEDIA_ROOT, filename)
    dirname = os.path.dirname(full_path)
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    bounds = "1000x1000"
    token = md5_constructor("%s+%s" % (api_secret, url)).hexdigest()
    screenshot_url = "http://api.url2png.com/v3/%s/%s/%s/%s" % (api_key, token, bounds, url)

    try:
        response = urllib2.urlopen(screenshot_url, timeout=120)
    except:
        screenshot_logger.error(u"Failed to fetch screenshot for '%s': %s %s" % (url, sys.exc_info()[0], sys.exc_info()[1]))
        return

    f = open(full_path, "w+")
    f.write(response.read())
    f.close()
    update_item(item, screenshot=filename)
    screenshot_logger.info(u"Fetched screenshot for '%s'" % url)
Example #3
0
    def handle(self, **options):

        from materials.models import CommunityItem
        from materials.models.course import Course
        from materials.models.library import Library
        from materials.tasks import check_url_status, update_screenshot
        from utils import update_item

        models = [Course, Library, CommunityItem]

        now = datetime.datetime.now()

        items = []

        # Get items that were never fetched
        for model in models:
            qs = model.objects.filter(url_fetched_on=None)
            if qs.exists():
                items += list(qs)
                qs.update(url_fetched_on=now)

        if not items:
            # Get the item with the oldest fetch date
            item = None
            for model in models:
                qs =  model.objects.exclude(url_fetched_on=None).order_by("url_fetched_on")
                if not qs.exists():
                    continue
                candidate = qs[0]
                if item is None or candidate.url_fetched_on < item.url_fetched_on:
                    item = candidate

            if item is not None:
                update_item(item, url_fetched_on=now)
                items.append(item)

        if not items:
            print "No items to fetch."
            return

        for item in items:
            print "Fetching URL for ", unicode(item)
            check_url_status(item)
            update_screenshot(item)

        print "Done!"
Example #4
0
def update_item_request(item_id):
    '''
    The endpoint to handle a request to update an item
    '''
    res = assert_user_is_authenticated()
    if res:
        return res

    db.connect()
    item = update_item(db.session,
                       item_id=item_id,
                       name=request.form.get('name'),
                       description=request.form.get('description'),
                       image_url=request.form.get('image_url'),
                       category_ids=(request.form.getlist('category_ids') if
                                     'category_ids' in request.form else None))
    db.disconnect()

    flash('Item "{}" edited!'.format(item.name))

    # Redirect to item page
    return redirect(url_for('item_page', item_id=item.id))
Example #5
0
def update_screenshot(item):
    url = item.url

    url = url.strip()
    url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]")
    if not url:
        return

    if item.http_status != 200:
        if item.screenshot:
            delete(item.screenshot)
            update_item(item, screenshot=None)
        return

    url_hash = md5_constructor(smart_str(url)).hexdigest()
    filename = "%s-%i-%s.png" % (item._meta.object_name.lower(), item.id, url_hash)
    filename = os.path.join(item.screenshot.field.get_directory_name(), filename)

    # Remove existing screenshot
    if item.screenshot:
        try:
            delete(item.screenshot)
        except OSError:
            pass
        update_item(item, screenshot=None)

    full_path = os.path.join(settings.MEDIA_ROOT, filename)
    dirname = os.path.dirname(full_path)
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    width = 1024
    height = 768

    executable = settings.WEBKIT2PNG_EXECUTABLE % dict(
        filename=full_path, url='"%s"' % url.replace('"', '\\"'), width=width, height=height
    )
    if isinstance(executable, unicode):
        executable = executable.encode(sys.getfilesystemencoding())
    args = shlex.split(executable)

    @timeout(timeout_duration=60 * 2)
    def fetch_screenshot(args):
        p = subprocess.Popen(args)
        try:
            p.wait()
            return 1
        except TimeoutError:
            p.kill()
            raise
        finally:
            try:
                p.terminate()
            except OSError:
                pass
        return None

    result = fetch_screenshot(args)
    if result:
        if os.path.exists(full_path):
            update_item(item, screenshot=filename)
    else:
        update_item(item, screenshot=None)
Example #6
0
def check_url_status(item):
    status_code = get_url_status_code(item.url)

    if item.http_status != status_code:
        update_item(item, http_status=status_code)
        reindex(item)