def get(guild, realm, fields=[]): fields = _wrap_arr(fields) fields = [field for field in fields if field in VALID_FIELDS] guild = urllib2.quote(guild) realm = urllib2.quote(realm) data = json.load(urllib2.urlopen(API_ROOT + GUILD_API % (realm, guild, ",".join(fields)))) return data
def _normalize_url(self): """ Normalize the request url """ self.url = urllib2.quote(self.url.encode('utf-8'), safe="%/:=&?~#+!$,;'@()*[]")
def get_wiki_content(title): # title is in unicode (utf-8) format with spaces, without underscores and # url escape characters wiki = wikiapi.WikiApi() spaces_to_underscores = '_'.join(title.split()) utf8_encoded_title = spaces_to_underscores.encode('utf8') url_title = urllib2.quote(utf8_encoded_title) # url escape article_url = wiki.get_article_url(url_title) # print repr(article_url) opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] content = opener.open(article_url).read() art = wiki.get_article(content) # print "Got article: ", art.heading # print "Content: ", art.content # print return art.content
def get_wiki_content(title): # title is in unicode (utf-8) format with spaces, without underscores and # url escape characters wiki = wikiapi.WikiApi() spaces_to_underscores = '_'.join(title.split()) utf8_encoded_title = spaces_to_underscores.encode('utf8') url_title = urllib2.quote(utf8_encoded_title) # url escape article_url = wiki.get_article_url(url_title) # print repr(article_url) opener = urllib2.build_opener() opener.addheaders = [('User-agent', 'Mozilla/5.0')] content = opener.open(article_url).read() art = wiki.get_article(content) # print "Got article: ", art.heading # print "Content: ", art.content # print return art.content
def handle404(self, reqorig, url, container, obj): """ Return a webob.Response which fetches the thumbnail from the thumb host, potentially writes it out to Swift so we don't 404 next time, and returns it. Note also that the thumb host might write it out to Swift so we don't have to. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() opener = urllib2.build_opener() # Pass on certain headers from the caller squid to the scalers opener.addheaders = [] if reqorig.headers.get('User-Agent') != None: opener.addheaders.append(('User-Agent', reqorig.headers.get('User-Agent'))) else: opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in ['X-Forwarded-For', 'X-Original-URI']: if reqorig.headers.get( header_to_pass ) != None: opener.addheaders.append((header_to_pass, reqorig.headers.get( header_to_pass ))) # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: # break apach the url, url-encode it, and put it back together urlobj = list(urlparse.urlsplit(reqorig.url)) urlobj[2] = urllib2.quote(urlobj[2], '%/') encodedurl = urlparse.urlunsplit(urlobj) # ok, call the encoded url upcopy = opener.open(encodedurl) except urllib2.HTTPError,status: if status.code == 404: resp = webob.exc.HTTPNotFound('Expected original file not found') return resp else: resp = webob.exc.HTTPNotFound('Unexpected error %s' % status) resp.status = status.code return resp
def _normalize_url(self): """ Normalize the request url """ self.url = urllib2.quote(self.url.encode('utf-8'), safe="%/:=&?~#+!$,;'@()*[]")
class WMFRewrite(object): """ Rewrite Media Store URLs so that swift knows how to deal. Mostly it's a question of inserting the AUTH_ string, and changing / to - in the container section. """ def __init__(self, app, conf): self.app = app self.account = conf['account'].strip() self.authurl = conf['url'].strip() self.login = conf['login'].strip() self.key = conf['key'].strip() self.thumbhost = conf['thumbhost'].strip() self.writethumb = 'writethumb' in conf self.user_agent = conf['user_agent'].strip() self.bind_port = conf['bind_port'].strip() self.shard_containers = conf['shard_containers'].strip( ) #all, some, none if (self.shard_containers == 'some'): # if we're supposed to shard some containers, get a cleaned list of the containers to shard def striplist(l): return ([x.strip() for x in l]) self.shard_container_list = striplist( conf['shard_container_list'].split(',')) #self.logger = get_logger(conf) def handle404(self, reqorig, url, container, obj): """ Return a webob.Response which fetches the thumbnail from the thumb host, potentially writes it out to Swift so we don't 404 next time, and returns it. Note also that the thumb host might write it out to Swift so we don't have to. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() opener = urllib2.build_opener() opener.addheaders = [('User-agent', self.user_agent)] # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: upcopy = opener.open(reqorig.url) except urllib2.HTTPError, status: if status == 404: resp = webob.exc.HTTPNotFound( 'Expected original file not found') return resp else: resp = webob.exc.HTTPNotFound('Unexpected error %s' % status) return resp # get the Content-Type. uinfo = upcopy.info() c_t = uinfo.gettype() # sometimes Last-Modified isn't present; use now() when that happens. try: last_modified = time.mktime(uinfo.getdate('Last-Modified')) except TypeError: last_modified = time.mktime(time.localtime()) if self.writethumb: # Fetch from upload, write into the cluster, and return it upcopy = Copy2(upcopy, self.app, url, urllib2.quote(container), obj, self.authurl, self.login, self.key, content_type=c_t, modified=last_modified) resp = webob.Response(app_iter=upcopy, content_type=c_t) resp.headers.add('Last-Modified', uinfo.getheader('Last-Modified')) return resp
def quote(card_name): """ Quotes card name """ return urllib2.quote(card_name)
def handle404(self, reqorig, url, container, obj): """ Return a webob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost}) redirect_handler = DumbRedirectHandler() opener = urllib2.build_opener(redirect_handler, proxy_handler) # Thumbor doesn't need (and doesn't like) the proxy thumbor_opener = urllib2.build_opener(redirect_handler) # Pass on certain headers from the caller squid to the scalers opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: opener.addheaders.append(('User-Agent', reqorig.headers.get('User-Agent'))) else: opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in ['X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI']: if reqorig.headers.get(header_to_pass) is not None: opener.addheaders.append((header_to_pass, reqorig.headers.get(header_to_pass))) thumbor_opener.addheaders = opener.addheaders self.logger.debug("Addheaders: %r" % thumbor_opener.addheaders) # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: # break apach the url, url-encode it, and put it back together urlobj = list(urlparse.urlsplit(reqorig.url)) # encode the URL but don't encode %s and /s urlobj[2] = urllib2.quote(urlobj[2], '%/') encodedurl = urlparse.urlunsplit(urlobj) # Thumbor never needs URL mangling and it needs a different host if self.thumborhost: thumbor_reqorig = reqorig.copy() thumbor_reqorig.host = self.thumborhost thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj) # if sitelang, we're supposed to mangle the URL so that # http://upload.wikimedia.org/wikipedia/commons/thumb/a/a2/Little_kitten_.jpg/330px-Little_kitten_.jpg # changes to http://commons.wikipedia.org/w/thumb_handler.php/a/a2/Little_kitten_.jpg/330px-Little_kitten_.jpg if self.backend_url_format == 'sitelang': match = re.match(r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') # and here are all the legacy special cases, imported from thumb_handler.php if(proj == 'wikipedia'): if(lang in ['meta', 'commons', 'internal', 'grants']): proj = 'wikimedia' if(lang in ['mediawiki']): lang = 'www' proj = 'mediawiki' hostname = '%s.%s.org' % (lang, proj) if(proj == 'wikipedia' and lang == 'sources'): #yay special case hostname = 'wikisource.org' # ok, replace the URL with just the part starting with thumb/ # take off the first two parts of the path (eg /wikipedia/commons/); make sure the string starts with a / encodedurl = 'http://%s/w/thumb_handler.php/%s' % (hostname, match.group('path')) # add in the X-Original-URI with the swift got (minus the hostname) opener.addheaders.append(('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2])) else: # ASSERT this code should never be hit since only thumbs should call the 404 handler self.logger.warn("non-thumb in 404 handler! encodedurl = %s" % encodedurl) resp = webob.exc.HTTPNotFound('Unexpected error') return resp else: # log the result of the match here to test and make sure it's sane before enabling the config match = re.match(r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') self.logger.warn("sitelang match has proj %s lang %s encodedurl %s" % (proj, lang, encodedurl)) else: self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError, error: # copy the urllib2 HTTPError into a webob HTTPError class as-is class CopiedHTTPError(webob.exc.HTTPError): code = error.code title = error.msg def html_body(self, environ): return self.detail def __init__(self): super(CopiedHTTPError, self).__init__( detail="".join(error.readlines()), headers=error.hdrs.items()) resp = CopiedHTTPError() return resp
def save_svg_and_png(self, kwargs): """ Save png out of the svg version of the chart """ if not IFolderish.providedBy(self.context): return _("Can't save png chart on a non-folderish object !") form = getattr(self.request, 'form', {}) kwargs.update(form) filename = kwargs.get('filename', 'img') chart_url = self.context.absolute_url() + "#" + "tab-" + filename svg_filename = filename + ".svg" filename += ".png" sp = self.siteProperties qr_size = sp.get('googlechart.qrcode_size', '70') object_ids = self.context.objectIds() if qr_size == '0': qr_size = '70' qr_url = ( u"http://chart.apis.google.com" "/chart?cht=qr&chld=H|0&chs=%sx%s&chl=%s" % ( qr_size, qr_size, urllib2.quote(chart_url))) self.request.form['qr_url'] = qr_url svg_data = kwargs.get('svg', '') if not svg_data: return _("Success") new_svg = False if svg_filename not in object_ids: new_svg = True svg_filename = self.context.invokeFactory('File', id=svg_filename) svg_obj = self.context._getOb(svg_filename) svg_file_field = svg_obj.getField('file') svg_field_data = svg_file_field.getRaw(svg_obj).getIterator().read() if svg_field_data and svg_data == svg_field_data: return _("Success") elif svg_field_data: # 21894 svg_data from the form and the data saved within the current # svg files sometimes has the clipPath id number changed, otherwise # the files are identical in which case we no longer need to perform # any svg and image generation pattern = re.compile(r'_ABSTRACT_RENDERER_ID_\d+') svg_data_match = pattern.search(svg_data).group() svg_field_data_matched = pattern.sub(svg_data_match, svg_field_data) if svg_data == svg_field_data_matched: return _("Success") # create image from the current svg img = super(SavePNGChart, self).__call__() if not img: return _("ERROR: An error occured while exporting your image. " "Please try again later.") new_file = False if filename not in object_ids: new_file = True filename = self.context.invokeFactory('Image', id=filename) img_obj = self.context._getOb(filename) if new_file: img_obj.setExcludeFromNav(True) image_field = img_obj.getField('image') image_field.getMutator(img_obj)(img) if new_svg: svg_obj.setExcludeFromNav(True) svg_file_field.getMutator(svg_obj)(svg_data) wftool = getToolByName(svg_obj, "portal_workflow") state = wftool.getInfoFor(svg_obj, 'review_state', None) if state: if state != 'visible': workflows = wftool.getWorkflowsFor(svg_obj) workflow = workflows[0] transitions = workflow.transitions available_transitions = [transitions[i['id']] for i in wftool.getTransitionsFor(svg_obj)] to_do = [k for k in available_transitions if k.new_state_id == 'published'] self.request.form['_no_emails_'] = True for item in to_do: workflow.doActionFor(svg_obj, item.id) break # then make it public draft available_transitions = [transitions[i['id']] for i in wftool.getTransitionsFor(svg_obj)] to_do = [k for k in available_transitions if k.new_state_id == 'visible'] for item in to_do: workflow.doActionFor(svg_obj, item.id) break svg_obj.reindexObject() if not new_svg: notify(InvalidateCacheEvent(svg_obj)) return _("Success")
def title_to_article_url(title): spaces_to_underscores = '_'.join(title.split()) utf8_encoded_title = spaces_to_underscores.encode('utf8') url_title = urllib2.quote(utf8_encoded_title) # url escape article_url = wiki.get_article_url(url_title) return article_url
def title_to_article_url(title): spaces_to_underscores = '_'.join(title.split()) utf8_encoded_title = spaces_to_underscores.encode('utf8') url_title = urllib2.quote(utf8_encoded_title) # url escape article_url = wiki.get_article_url(url_title) return article_url
def get_card(name, redaction): """Parses card info, if no info returns card object without info and prices :return: models.Card object """ page_url = MagiccardsScraper.MAGICCARDS_BASE_URL + MagiccardsScraper.MAGICCARDS_QUERY_TMPL % urllib2.quote(name) page = openurl(page_url) soup = BeautifulSoup(page) # if card was not found by name, try to use magiccards hints if not MagiccardsScraper._is_card_page(soup): hint = MagiccardsScraper._try_get_hint(name, soup) if hint is None: return None name = hint.text page_url = ext.url_join(ext.get_domain(page_url), hint['href']) page = openurl(page_url) soup = BeautifulSoup(page) # if card is found, but it's not english if not MagiccardsScraper._is_en(soup): en_link_tag = list(soup.find_all('table')[3].find_all('td')[2].find('img', alt='English').next_elements)[1] name = en_link_tag.text page_url = ext.url_join(ext.get_domain(page_url), en_link_tag['href']) page = openurl(page_url) soup = BeautifulSoup(page) # if card redaction is wrong, try to get correct if not MagiccardsScraper._reda_is(redaction, soup): page_url = MagiccardsScraper._get_correct_reda(redaction, soup) if page_url is None: return None page = openurl(page_url) soup = BeautifulSoup(page) type = MagiccardsScraper._get_card_type(soup) info = MagiccardsScraper._get_card_info(soup) price = MagiccardsScraper._get_prices(soup) card_info = models.CardInfo(**info) card_prices = models.CardPrices(**price) return models.Card(ext.uni(name), ext.uni(redaction), type, card_info, card_prices)
def save_svg_and_png(self, kwargs): """ Save png out of the svg version of the chart """ if not IFolderish.providedBy(self.context): return _("Can't save png chart on a non-folderish object !") form = getattr(self.request, 'form', {}) kwargs.update(form) filename = kwargs.get('filename', 'img') chart_url = self.context.absolute_url() + "#" + "tab-" + filename svg_filename = filename + ".svg" filename += ".png" sp = self.siteProperties qr_size = sp.get('googlechart.qrcode_size', '70') object_ids = self.context.objectIds() if qr_size == '0': qr_size = '70' qr_url = (u"https://chart.apis.google.com" "/chart?cht=qr&chld=H%sC0&chs=%sx%s&chl=%s" % ("%7", qr_size, qr_size, urllib2.quote(chart_url))) self.request.form['qr_url'] = qr_url svg_data = kwargs.get('svg', '') if not svg_data: return _("Success") new_svg = False if svg_filename not in object_ids: new_svg = True svg_filename = self.context.invokeFactory('File', id=svg_filename) svg_obj = self.context._getOb(svg_filename) svg_file_field = svg_obj.getField('file') svg_field_data = svg_file_field.getRaw(svg_obj).getIterator().read() if svg_field_data and svg_data == svg_field_data: return _("Success") elif svg_field_data: # 21894 svg_data from the form and the data saved within the current # svg files sometimes has the clipPath id number changed, otherwise # the files are identical in which case we no longer need to perform # any svg and image generation pattern = re.compile(r'_ABSTRACT_RENDERER_ID_\d+') # 79908 check if we have a result for pattern search pattern_match = pattern.search(svg_data) if pattern_match: svg_data_match = pattern_match.group() svg_field_data_matched = pattern.sub(svg_data_match, svg_field_data) if svg_data == svg_field_data_matched: return _("Success") # create image from the current svg img = super(SavePNGChart, self).__call__() # 79908 check if img return has PNG within the string # as img can contain ERROR message in case of an error # which means the image will contain a string instead of actual # image data if not img or img and 'PNG' not in img: return _("ERROR: An error occured while exporting your image. " "Please try again later.") new_file = False if filename not in object_ids: new_file = True filename = self.context.invokeFactory('Image', id=filename) img_obj = self.context._getOb(filename) if new_file: img_obj.setExcludeFromNav(True) image_field = img_obj.getField('image') image_field.getMutator(img_obj)(img) if new_svg: svg_obj.setExcludeFromNav(True) svg_file_field.getMutator(svg_obj)(svg_data) wftool = getToolByName(svg_obj, "portal_workflow") state = wftool.getInfoFor(svg_obj, 'review_state', None) if state: if state != 'visible': workflows = wftool.getWorkflowsFor(svg_obj) workflow = workflows[0] transitions = workflow.transitions # publish for transition in wftool.getTransitionsFor(svg_obj): tid = transition.get('id') tob = transitions.get(tid) if not tob: continue if tob.new_state_id != 'published': continue self.request.form['_no_emails_'] = True workflow.doActionFor(svg_obj, tid) break # then make it public draft for transition in wftool.getTransitionsFor(svg_obj): tid = transition.get('id') tob = transitions.get(tid) if not tob: continue if tob.new_state_id != 'visible': continue workflow.doActionFor(svg_obj, tid) break svg_obj.reindexObject() if not new_svg: notify(InvalidateCacheEvent(svg_obj)) return _("Success")
def handle404(self, reqorig, url, container, obj): """ Return a swob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() thumbor_opener = urllib2.build_opener(DumbRedirectHandler()) # Pass on certain headers from Varnish to Thumbor thumbor_opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: thumbor_opener.addheaders.append( ('User-Agent', reqorig.headers.get('User-Agent'))) else: thumbor_opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in [ 'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI' ]: if reqorig.headers.get(header_to_pass) is not None: header = (header_to_pass, reqorig.headers.get(header_to_pass)) thumbor_opener.addheaders.append(header) # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: reqorig.host = self.thumborhost thumbor_urlobj = list(urlparse.urlsplit(reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError as error: # Wrap the urllib2 HTTPError into a swob HTTPException status = error.code if status not in swob.RESPONSE_REASONS: # Generic status description in case of unknown status reasons. status = "%s Error" % status return swob.HTTPException(status=status, body=error.msg, headers=error.hdrs.items()) except urllib2.URLError as error: msg = 'There was a problem while contacting the thumbnailing service: %s' % \ error.reason return swob.HTTPServiceUnavailable(msg) # get the Content-Type. uinfo = upcopy.info() c_t = uinfo.gettype() resp = swob.Response(app_iter=upcopy, content_type=c_t) headers_whitelist = [ 'Content-Length', 'Content-Disposition', 'Last-Modified', 'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server', 'Nginx-Request-Date', 'Nginx-Response-Date', 'Thumbor-Processing-Time', 'Thumbor-Processing-Utime', 'Thumbor-Request-Id', 'Thumbor-Request-Date' ] # add in the headers if we've got them for header in headers_whitelist: if (uinfo.getheader(header) != ''): resp.headers[header] = uinfo.getheader(header) # also add CORS; see also our CORS middleware resp.headers['Access-Control-Allow-Origin'] = '*' return resp
def handle404(self, reqorig, url, container, obj): """ Return a webob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost}) redirect_handler = DumbRedirectHandler() opener = urllib2.build_opener(redirect_handler, proxy_handler) # Thumbor doesn't need (and doesn't like) the proxy thumbor_opener = urllib2.build_opener(redirect_handler) # Pass on certain headers from the caller squid to the scalers opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: opener.addheaders.append(('User-Agent', reqorig.headers.get('User-Agent'))) else: opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in ['X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI']: if reqorig.headers.get(header_to_pass) is not None: opener.addheaders.append((header_to_pass, reqorig.headers.get(header_to_pass))) thumbor_opener.addheaders = opener.addheaders # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: # break apach the url, url-encode it, and put it back together urlobj = list(urlparse.urlsplit(reqorig.url)) # encode the URL but don't encode %s and /s urlobj[2] = urllib2.quote(urlobj[2], '%/') encodedurl = urlparse.urlunsplit(urlobj) # Thumbor never needs URL mangling and it needs a different host if self.thumborhost: thumbor_reqorig = reqorig.copy() thumbor_reqorig.host = self.thumborhost thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj) # if sitelang, we're supposed to mangle the URL so that # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg # changes to # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg if self.backend_url_format == 'sitelang': match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') # and here are all the legacy special cases, imported from thumb_handler.php if(proj == 'wikipedia'): if(lang in ['meta', 'commons', 'internal', 'grants']): proj = 'wikimedia' if(lang in ['mediawiki']): lang = 'www' proj = 'mediawiki' hostname = '%s.%s.%s' % (lang, proj, self.tld) if(proj == 'wikipedia' and lang == 'sources'): # yay special case hostname = 'wikisource.%s' % self.tld # ok, replace the URL with just the part starting with thumb/ # take off the first two parts of the path # (eg /wikipedia/commons/); make sure the string starts # with a / encodedurl = 'http://%s/w/thumb_handler.php/%s' % ( hostname, match.group('path')) # add in the X-Original-URI with the swift got (minus the hostname) opener.addheaders.append( ('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2])) else: # ASSERT this code should never be hit since only thumbs # should call the 404 handler self.logger.warn("non-thumb in 404 handler! encodedurl = %s" % encodedurl) resp = webob.exc.HTTPNotFound('Unexpected error') return resp else: # log the result of the match here to test and make sure it's # sane before enabling the config match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') self.logger.warn( "sitelang match has proj %s lang %s encodedurl %s" % ( proj, lang, encodedurl)) else: self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl) # To turn thumbor off and have thumbnail traffic served by image scalers, # replace the line below with this one: # upcopy = opener.open(encodedurl) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError, error: # copy the urllib2 HTTPError into a webob HTTPError class as-is class CopiedHTTPError(webob.exc.HTTPError): code = error.code title = error.msg def html_body(self, environ): return self.detail def __init__(self): super(CopiedHTTPError, self).__init__( detail="".join(error.readlines()), headers=error.hdrs.items()) return CopiedHTTPError()
def thumborify_url(self, reqorig, host): reqorig.host = host thumbor_urlobj = list(urlparse.urlsplit(reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') return urlparse.urlunsplit(thumbor_urlobj)
def handle404(self, reqorig, url, container, obj): """ Return a swob.Response which fetches the thumbnail from the thumb host and returns it. Note also that the thumb host might write it out to Swift so it won't 404 next time. """ # go to the thumb media store for unknown files reqorig.host = self.thumbhost # upload doesn't like our User-agent, otherwise we could call it # using urllib2.url() proxy_handler = urllib2.ProxyHandler({'http': self.thumbhost}) redirect_handler = DumbRedirectHandler() opener = urllib2.build_opener(redirect_handler, proxy_handler) # Thumbor doesn't need (and doesn't like) the proxy thumbor_opener = urllib2.build_opener(redirect_handler) # Pass on certain headers from the caller squid to the scalers opener.addheaders = [] if reqorig.headers.get('User-Agent') is not None: opener.addheaders.append( ('User-Agent', reqorig.headers.get('User-Agent'))) else: opener.addheaders.append(('User-Agent', self.user_agent)) for header_to_pass in [ 'X-Forwarded-For', 'X-Forwarded-Proto', 'Accept', 'Accept-Encoding', 'X-Original-URI' ]: if reqorig.headers.get(header_to_pass) is not None: opener.addheaders.append( (header_to_pass, reqorig.headers.get(header_to_pass))) thumbor_opener.addheaders = opener.addheaders # At least in theory, we shouldn't be handing out links to originals # that we don't have (or in the case of thumbs, can't generate). # However, someone may have a formerly valid link to a file, so we # should do them the favor of giving them a 404. try: # break apach the url, url-encode it, and put it back together urlobj = list(urlparse.urlsplit(reqorig.url)) # encode the URL but don't encode %s and /s urlobj[2] = urllib2.quote(urlobj[2], '%/') encodedurl = urlparse.urlunsplit(urlobj) # Thumbor never needs URL mangling and it needs a different host if self.thumborhost: thumbor_reqorig = swob.Request(reqorig.environ.copy()) thumbor_reqorig.host = self.thumborhost thumbor_urlobj = list(urlparse.urlsplit(thumbor_reqorig.url)) thumbor_urlobj[2] = urllib2.quote(thumbor_urlobj[2], '%/') thumbor_encodedurl = urlparse.urlunsplit(thumbor_urlobj) # if sitelang, we're supposed to mangle the URL so that # http://upload.wm.o/wikipedia/commons/thumb/a/a2/Foo_.jpg/330px-Foo_.jpg # changes to # http://commons.wp.o/w/thumb_handler.php/a/a2/Foo_.jpg/330px-Foo_.jpg if self.backend_url_format == 'sitelang': match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') # and here are all the legacy special cases, imported from thumb_handler.php if (proj == 'wikipedia'): if (lang in ['meta', 'commons', 'internal', 'grants']): proj = 'wikimedia' if (lang in ['mediawiki']): lang = 'www' proj = 'mediawiki' hostname = '%s.%s.%s' % (lang, proj, self.tld) if (proj == 'wikipedia' and lang == 'sources'): # yay special case hostname = 'wikisource.%s' % self.tld # ok, replace the URL with just the part starting with thumb/ # take off the first two parts of the path # (eg /wikipedia/commons/); make sure the string starts # with a / encodedurl = 'http://%s/w/thumb_handler.php/%s' % ( hostname, match.group('path')) # add in the X-Original-URI with the swift got (minus the hostname) opener.addheaders.append( ('X-Original-URI', list(urlparse.urlsplit(reqorig.url))[2])) else: # ASSERT this code should never be hit since only thumbs # should call the 404 handler self.logger.warn( "non-thumb in 404 handler! encodedurl = %s" % encodedurl) resp = swob.HTTPNotFound('Unexpected error') return resp else: # log the result of the match here to test and make sure it's # sane before enabling the config match = re.match( r'^http://(?P<host>[^/]+)/(?P<proj>[^-/]+)/(?P<lang>[^/]+)/thumb/(?P<path>.+)', encodedurl) if match: proj = match.group('proj') lang = match.group('lang') self.logger.warn( "sitelang match has proj %s lang %s encodedurl %s" % (proj, lang, encodedurl)) else: self.logger.warn("no sitelang match on encodedurl: %s" % encodedurl) # To turn thumbor off and have thumbnail traffic served by image scalers, # replace the line below with this one: # upcopy = opener.open(encodedurl) upcopy = thumbor_opener.open(thumbor_encodedurl) except urllib2.HTTPError as error: # Wrap the urllib2 HTTPError into a swob HTTPException status = error.code if status not in swob.RESPONSE_REASONS: # Generic status description in case of unknown status reasons. status = "%s Error" % status return swob.HTTPException(status=status, body=error.msg, headers=error.hdrs.items()) except urllib2.URLError as error: msg = 'There was a problem while contacting the thumbnailing service: %s' % \ error.reason return swob.HTTPServiceUnavailable(msg) # get the Content-Type. uinfo = upcopy.info() c_t = uinfo.gettype() resp = swob.Response(app_iter=upcopy, content_type=c_t) headers_whitelist = [ 'Content-Length', 'Content-Disposition', 'Last-Modified', 'Accept-Ranges', 'XKey', 'Thumbor-Engine', 'Server', 'Nginx-Request-Date', 'Nginx-Response-Date', 'Thumbor-Processing-Time', 'Thumbor-Processing-Utime', 'Thumbor-Request-Id', 'Thumbor-Request-Date' ] # add in the headers if we've got them for header in headers_whitelist: if (uinfo.getheader(header) != ''): resp.headers[header] = uinfo.getheader(header) # also add CORS; see also our CORS middleware resp.headers['Access-Control-Allow-Origin'] = '*' return resp