def check_url_status(item): status_code = get_url_status_code(item.url) if item.http_status != status_code: update_item(item, http_status=status_code) if isinstance(item, Indexed): item.reindex()
def update_screenshot(item, force=False): api_key = getattr(settings, "URL2PNG_KEY", None) api_secret = getattr(settings, "URL2PNG_SECRET", None) if not api_key or not api_secret: screenshot_logger.warning(u"Either URL2PNG_KEY or URL2PNG_SECRET is missing. Can't fetch screenshot.") return url = item.url url = url.strip() url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") if not url: return if item.http_status != 200: screenshot_logger.warning(u"Skipping '%s'. HTTP status is not 200." % item.url) return if item.screenshot and not force: screenshot_logger.warning(u"Skipping '%s'. Screenshot exists already." % item.url) return url_hash = md5_constructor(smart_str(url)).hexdigest() filename = "%s-%i-%s.png" % (item._meta.object_name.lower(), item.id, url_hash) filename = os.path.join(item.screenshot.field.get_directory_name(), filename) # Remove existing screenshot if item.screenshot: try: delete(item.screenshot) except OSError: pass update_item(item, screenshot=None) full_path = os.path.join(settings.MEDIA_ROOT, filename) dirname = os.path.dirname(full_path) if not os.path.exists(dirname): os.makedirs(dirname) bounds = "1000x1000" token = md5_constructor("%s+%s" % (api_secret, url)).hexdigest() screenshot_url = "http://api.url2png.com/v3/%s/%s/%s/%s" % (api_key, token, bounds, url) try: response = urllib2.urlopen(screenshot_url, timeout=120) except: screenshot_logger.error(u"Failed to fetch screenshot for '%s': %s %s" % (url, sys.exc_info()[0], sys.exc_info()[1])) return f = open(full_path, "w+") f.write(response.read()) f.close() update_item(item, screenshot=filename) screenshot_logger.info(u"Fetched screenshot for '%s'" % url)
def handle(self, **options): from materials.models import CommunityItem from materials.models.course import Course from materials.models.library import Library from materials.tasks import check_url_status, update_screenshot from utils import update_item models = [Course, Library, CommunityItem] now = datetime.datetime.now() items = [] # Get items that were never fetched for model in models: qs = model.objects.filter(url_fetched_on=None) if qs.exists(): items += list(qs) qs.update(url_fetched_on=now) if not items: # Get the item with the oldest fetch date item = None for model in models: qs = model.objects.exclude(url_fetched_on=None).order_by("url_fetched_on") if not qs.exists(): continue candidate = qs[0] if item is None or candidate.url_fetched_on < item.url_fetched_on: item = candidate if item is not None: update_item(item, url_fetched_on=now) items.append(item) if not items: print "No items to fetch." return for item in items: print "Fetching URL for ", unicode(item) check_url_status(item) update_screenshot(item) print "Done!"
def update_item_request(item_id): ''' The endpoint to handle a request to update an item ''' res = assert_user_is_authenticated() if res: return res db.connect() item = update_item(db.session, item_id=item_id, name=request.form.get('name'), description=request.form.get('description'), image_url=request.form.get('image_url'), category_ids=(request.form.getlist('category_ids') if 'category_ids' in request.form else None)) db.disconnect() flash('Item "{}" edited!'.format(item.name)) # Redirect to item page return redirect(url_for('item_page', item_id=item.id))
def update_screenshot(item): url = item.url url = url.strip() url = urllib.quote(url, safe="%/:=&?~#+!$,;'@()*[]") if not url: return if item.http_status != 200: if item.screenshot: delete(item.screenshot) update_item(item, screenshot=None) return url_hash = md5_constructor(smart_str(url)).hexdigest() filename = "%s-%i-%s.png" % (item._meta.object_name.lower(), item.id, url_hash) filename = os.path.join(item.screenshot.field.get_directory_name(), filename) # Remove existing screenshot if item.screenshot: try: delete(item.screenshot) except OSError: pass update_item(item, screenshot=None) full_path = os.path.join(settings.MEDIA_ROOT, filename) dirname = os.path.dirname(full_path) if not os.path.exists(dirname): os.makedirs(dirname) width = 1024 height = 768 executable = settings.WEBKIT2PNG_EXECUTABLE % dict( filename=full_path, url='"%s"' % url.replace('"', '\\"'), width=width, height=height ) if isinstance(executable, unicode): executable = executable.encode(sys.getfilesystemencoding()) args = shlex.split(executable) @timeout(timeout_duration=60 * 2) def fetch_screenshot(args): p = subprocess.Popen(args) try: p.wait() return 1 except TimeoutError: p.kill() raise finally: try: p.terminate() except OSError: pass return None result = fetch_screenshot(args) if result: if os.path.exists(full_path): update_item(item, screenshot=filename) else: update_item(item, screenshot=None)
def check_url_status(item): status_code = get_url_status_code(item.url) if item.http_status != status_code: update_item(item, http_status=status_code) reindex(item)