def import_pages(): from pages.models import Page, slugify request = api.APIRequest(site, { 'action': 'query', 'list': 'allpages', 'aplimit': '50', }) print "Getting master page list (this may take a bit).." response_list = request.query(querycontinue=False)['query']['allpages'] pages = pagelist.listFromQuery(site, response_list) print "Got master page list." for mw_p in pages[:100]: print "Importing %s" % mw_p.title wikitext = mw_p.getWikiText() if mw_p.isRedir(): add_redirect(mw_p) continue html = render_wikitext(mw_p.title, wikitext) if Page.objects.filter(slug=slugify(mw_p.title)): # Page already exists with this slug. This is probably because # MediaWiki has case-sensitive pagenames. other_page = Page.objects.get(slug=slugify(mw_p.title)) if len(html) > other_page.content: # *This* page has more content. Let's use it instead. for other_page_version in other_page.versions.all(): other_page_version.delete() other_page.delete(track_changes=False) p = Page(name=mw_p.title, content=html) p.content = process_html(p.content, p.name) p.clean_fields() p.save()
def import_redirect(from_pagename): # We create the Redirects here. We don't try and port over the # version information for the formerly-page-text-based redirects. to_pagename = parse_redirect(from_pagename) if to_pagename is None: print "Error creating redirect: %s has no link" % from_pagename return to_pagename = fix_pagename(to_pagename) from pages.models import Page, slugify from redirects.models import Redirect u = get_robot_user() try: to_page = Page.objects.get(slug=slugify(to_pagename)) except Page.DoesNotExist: print "Error creating redirect: %s --> %s" % ( from_pagename.encode('utf-8'), to_pagename.encode('utf-8')) print " (page %s does not exist)" % to_pagename.encode('utf-8') return if slugify(from_pagename) == to_page.slug: return if not Redirect.objects.filter(source=slugify(from_pagename)): r = Redirect(source=slugify(from_pagename), destination=to_page) try: r.save(user=u, comment="Automated edit. Creating redirect.") except IntegrityError: connection.close() print "Redirect %s --> %s created" % (from_pagename.encode('utf-8'), to_pagename.encode('utf-8'))
def process_redirects(): # We create the Redirects here. We don't try and port over the # version information for the formerly-page-text-based redirects. global redirects from pages.models import Page, slugify from redirects.models import Redirect u = get_robot_user() for from_pagename, to_pagename in redirects: try: to_page = Page.objects.get(slug=slugify(to_pagename)) except Page.DoesNotExist: print "Error creating redirect: %s --> %s" % ( from_pagename, to_pagename) print " (page %s does not exist)" % to_pagename continue if slugify(from_pagename) == to_page.slug: continue if not Redirect.objects.filter(source=slugify(from_pagename)): r = Redirect(source=slugify(from_pagename), destination=to_page) r.save(user=u, comment="Automated edit. Creating redirect.") print "Redirect %s --> %s created" % (from_pagename, to_pagename)
def forwards(self, orm): from pages.models import slugify # Adding field 'Link.destination_slug' db.add_column(u'links_link', 'destination_slug', self.gf('django.db.models.fields.CharField')( default='testingo', max_length=255, db_index=True), keep_default=False) # Adding field 'IncludedPage.included_page_slug' db.add_column(u'links_includedpage', 'included_page_slug', self.gf('django.db.models.fields.CharField')( default='testingo', max_length=255, db_index=True), keep_default=False) for link in orm['links.Link'].objects.all().defer( 'destination', 'source').iterator(): link.destination_slug = slugify(link.destination_name) link.save() for included in orm['links.IncludedPage'].objects.all().defer( 'source', 'included_page').iterator(): included.included_page_slug = slugify(included.included_page_name) included.save()
def record_page_links(page): region = page.region links = extract_internal_links(page.content) for pagename, count in links.iteritems(): qs = Link.objects.filter(source=page, region=region) link_exists = qs.filter(destination_slug=slugify(pagename)) | qs.filter(destination__slug=slugify(pagename)) if link_exists: link = link_exists[0] if link.count == count: # No new links with this name on this page, so skip updating. continue link.count = count else: page_exists = Page.objects.filter(slug=slugify(pagename), region=region) if page_exists: destination = page_exists[0] else: destination = None # Exists for some reason already (probably running a script that's moving between regions?) if destination and Link.objects.filter(source=page, destination=destination).exists(): continue link = Link( source=page, region=region, destination=destination, destination_name=pagename, destination_slug=slugify(pagename), count=count, ) link.save()
def record_page_includes(page): region = page.region included = extract_included_pagenames(page.content) for pagename in included: included_pg_exists = IncludedPage.objects.filter( source=page, region=region, included_page_slug=slugify(pagename)) if not included_pg_exists: page_exists = Page.objects.filter(slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None m = IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, included_page_slug=slugify(pagename), ) m.save() included = [slugify(pagename) for pagename in included] # Remove included pages they've removed from the page to_delete = IncludedPage.objects.filter( source=page, region=region).exclude(included_page_slug__in=included) for m in to_delete: m.delete()
def record_page_includes(page): region = page.region included = extract_included_pagenames(page.content) for pagename in included: included_pg_exists = IncludedPage.objects.filter( source=page, region=region, included_page_slug=slugify(pagename)) if not included_pg_exists: page_exists = Page.objects.filter(slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None m = IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, included_page_slug=slugify(pagename), ) m.save() included = [slugify(pagename) for pagename in included] # Remove included pages they've removed from the page to_delete = IncludedPage.objects.filter(source=page, region=region).exclude(included_page_slug__in=included) for m in to_delete: m.delete()
def create_mw_template_as_page(template_name, template_html): """ Create a page to hold the rendered template. Returns: String representing the pagename of the new include-able page. """ from pages.models import Page, slugify robot = get_robot_user() name_part = template_name[len('Template:'):] # Keeping it simple for now. We can namespace later if people want that. include_name = name_part if not Page.objects.filter(slug=slugify(include_name)): mw_page = page.Page(site, title=template_name) p = Page(name=include_name) p.content = process_html(template_html, pagename=template_name, mw_page_id=mw_page.pageid, attach_img_to_pagename=include_name, show_img_borders=False) p.clean_fields() # check if it exists again, processing takes time if not Page.objects.filter(slug=slugify(include_name)): p.save(user=robot, comment="Automated edit. Creating included page.") return include_name
def process_redirects(): # We create the Redirects here. We don't try and port over the # version information for the formerly-page-text-based redirects. global redirects from pages.models import Page, slugify from redirects.models import Redirect u = get_robot_user() for from_pagename, to_pagename in redirects: try: to_page = Page.objects.get(slug=slugify(to_pagename)) except Page.DoesNotExist: print "Error creating redirect: %s --> %s" % (from_pagename, to_pagename) print " (page %s does not exist)" % to_pagename continue if slugify(from_pagename) == to_page.slug: continue if not Redirect.objects.filter(source=slugify(from_pagename)): r = Redirect(source=slugify(from_pagename), destination=to_page) r.save(user=u, comment="Automated edit. Creating redirect.") print "Redirect %s --> %s created" % (from_pagename, to_pagename)
def _get_or_create_page(self): pagename = self.request.GET.get('pagename') region = self.get_region() has_page = Page.objects.filter(slug=slugify(pagename), region=region) if has_page: page = has_page[0] else: content = _('<p>What do you know about %s?</p>') % pagename page = Page(slug=slugify(pagename), name=pagename, content=content, region=region) return page
def handler404(self, request, *args, **kwargs): page_slug = kwargs.get("slug") try: page = Page.objects.get(slug=slugify(page_slug)) except Page.DoesNotExist: page = Page(slug=slugify(page_slug)) mapdata = MapData(page=page) return HttpResponseNotFound( direct_to_template(request, "maps/mapdata_new.html", {"page": page, "mapdata": mapdata}) )
def handler404(self, request, *args, **kwargs): page_slug = kwargs.get('slug') try: page = Page.objects.get(slug=slugify(page_slug)) except Page.DoesNotExist: page = Page(slug=slugify(page_slug)) mapdata = MapData(page=page) return HttpResponseNotFound( render(request, 'maps/mapdata_new.html', {'page': page, 'mapdata': mapdata}) )
def import_page(mw_p): from pages.models import Page, slugify print "Importing %s" % mw_p.title.encode('utf-8') parsed = parse_page(mw_p.title) html = parsed['html'] name = fix_pagename(mw_p.title) if Page.objects.filter(slug=slugify(name)).exists(): print "Page %s already exists" % name.encode('utf-8') # Page already exists with this slug. This is probably because # MediaWiki has case-sensitive pagenames. other_page = Page.objects.get(slug=slugify(name)) if len(html) > other_page.content: print "Clearing out other page..", other_page.name.encode('utf-8') # *This* page has more content. Let's use it instead. for other_page_version in other_page.versions.all(): other_page_version.delete() other_page.delete(track_changes=False) else: # Other page has more content. return if mw_p.title.startswith('Category:'): # include list of tagged pages include_html = ( '<a href="tags/%(quoted_tag)s" ' 'class="plugin includetag includepage_showtitle">' 'List of pages tagged "%(tag)s"' '</a>' % { 'quoted_tag': urllib.quote(name), 'tag': name, } ) html += include_html p = Page(name=name, content=html) p.content = process_html(p.content, pagename=p.name, templates=parsed['templates'], mw_page_id=mw_p.pageid, historic=False) if not (p.content.strip()): p.content = '<p> </p>' # page content can't be blank p.clean_fields() try: p.save(track_changes=False) except IntegrityError: connection.close() try: create_page_revisions(p, mw_p, parsed) except KeyError: # For some reason the response lacks a revisions key # TODO: figure out why pass process_page_categories(p, parsed['categories'])
def handler404(self, request, *args, **kwargs): page_slug = kwargs.get('slug') try: page = Page.objects.get(slug=slugify(page_slug)) except Page.DoesNotExist: page = Page(slug=slugify(page_slug)) mapdata = MapData(page=page) return HttpResponseNotFound( direct_to_template(request, 'maps/mapdata_new.html', {'page': page, 'mapdata': mapdata}) )
def forwards(self, orm): from pages.models import slugify from links import extract_included_pagenames for page in orm['pages.Page'].objects.all().iterator(): region = page.region included_pages = extract_included_pagenames(page.content) print "..recording included pages on %s" % smart_str(page.name) for pagename in included_pages: page_exists = orm['pages.Page'].objects.filter( slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None if orm.IncludedPage.objects.filter( source=page, included_page=included_page).exists(): continue if orm.IncludedPage.objects.filter( source=page, included_page_name__iexact=pagename).exists(): if included_page: included = orm.IncludedPage.objects.filter( source=page, included_page_name__iexact=pagename)[0] included.included_page = included_page included.save() else: included = orm.IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, ) included.save()
def process_mapdata(): # We create the MapData models here. We can't create them until the # Page objects are created. global mapdata_objects_to_create from maps.models import MapData from pages.models import Page, slugify from django.contrib.gis.geos import Point, MultiPoint for item in mapdata_objects_to_create: print "Adding mapdata for", item['pagename'] p = Page.objects.get(slug=slugify(item['pagename'])) mapdata = MapData.objects.filter(page=p) y = float(item['lat']) x = float(item['lon']) point = Point(x, y) if mapdata: m = mapdata[0] points = m.points points.append(point) m.points = points else: points = MultiPoint(point) m = MapData(page=p, points=points) m.save()
def extract_included_tags(html): """ Args: html: A string containing an HTML5 fragment. Returns: A list of the included tag slugs (lowercased). """ from tags.models import slugify parser = html5lib.HTMLParser( tree=html5lib.treebuilders.getTreeBuilder("lxml"), namespaceHTMLElements=False) # Wrap to make the tree lookup easier tree = parser.parseFragment('<div>%s</div>' % html)[0] a_s = tree.xpath('//a') # Grab the link source if it's an included page l = [] for a in a_s: if _is_included_tag(a): try: item = slugify(url_to_name(a.attrib.get('href'))[TAGS_PATH_LEN:].lower()) except UnicodeDecodeError: continue l.append(item) return l
def extra_context(self): context = super(CreatePageSearchView, self).extra_context() context['page_exists_for_query'] = Page.objects.filter( slug=slugify(self.query)) context['query_slug'] = Page(name=self.query).pretty_slug context['keywords'] = self.query.split() return context
def forwards(self, orm): from pages.models import slugify from links import extract_included_pagenames for page in orm['pages.Page'].objects.all().iterator(): region = page.region included_pages = extract_included_pagenames(page.content) print "..recording included pages on %s" % smart_str(page.name) for pagename in included_pages: page_exists = orm['pages.Page'].objects.filter(slug=slugify(pagename), region=region) if page_exists: included_page = page_exists[0] else: included_page = None if orm.IncludedPage.objects.filter(source=page, included_page=included_page).exists(): continue if orm.IncludedPage.objects.filter(source=page, included_page_name__iexact=pagename).exists(): if included_page: included = orm.IncludedPage.objects.filter(source=page, included_page_name__iexact=pagename)[0] included.included_page = included_page included.save() else: included = orm.IncludedPage( source=page, region=region, included_page=included_page, included_page_name=pagename, ) included.save()
def get_object(self): source = slugify(self.kwargs.get('slug')) redirect = Redirect.objects.filter(source=source, region=self.get_region()) if redirect: return redirect[0] return Redirect(source=source, region=self.get_region())
def get_user_page(user, request): """ Hacky heuristics for picking the underlying Page that holds the userpage content. TODO: Make this all belong the a single administrative region, 'users', once we have a notifications framework in place. """ from pages.models import Page, slugify pagename = "Users/%s" % user.username user_pages = Page.objects.filter(slug=slugify(pagename)) if user_pages: # Just pick the first one return user_pages[0] else: # Check to see if they've edited a region recently edited_pages = Page.versions.filter(version_info__user=user) referer = request.META.get('HTTP_REFERER') if edited_pages.exists(): region = edited_pages[0].region return Page(name=pagename, region=region) # Let's try and guess by the previous URL. Ugh! if referer: urlparts = urllib_parse.urlparse(referer) # Is this host us? for host in settings.ALLOWED_HOSTS: if urlparts.netloc.endswith(host): pathparts = parts.path.split('/') # Is the path in a region? if len(pathparts) > 1 and Region.objects.filter(slug=pathparts[1]).exists(): return Page(name=pagename, region=Region.objects.get(slug=pathparts[1])) # Put it in the main region for now :/ return Page(name=pagename, region=get_main_region())
def get_object(self): page_slug = self.kwargs.get("slug") page = Page.objects.get(slug=slugify(page_slug)) mapdatas = MapData.objects.filter(page=page) if mapdatas: return mapdatas[0] return MapData(page=page)
def extra_context(self): context = super(CreatePageSearchView, self).extra_context() context["page_exists_for_query"] = Page.objects.filter(slug=slugify(self.query)) context["query_slug"] = Page(name=self.query).pretty_slug context["keywords"] = self.query.split() context["map"] = self.get_map() return context
def get_object(self): page_slug = self.kwargs.get('slug') page = Page.objects.get(slug=slugify(page_slug)) mapdatas = MapData.objects.filter(page=page) if mapdatas: return mapdatas[0] return MapData(page=page)
def process_response(self, request, response): if response.status_code != 404: # No need to check for a redirect for non-404 responses. return response if _is_redirect(request) or _force_show_page(request): # Don't double-redirect and allow the page to be # force-displayed. return response r = None # Skip leading slash. slug = slugify(request.get_full_path()[1:]) # Skip trailing slash. if slug.endswith('/'): slug = slug[:-1] try: r = Redirect.objects.get(source=slug) except Redirect.DoesNotExist: pass if r is not None: return HttpResponseRedirect(r.destination.get_absolute_url() + '?&redirected_from=%s' % slug) # No redirect was found. Return the response. return response
def process_mapdata(): # We create the MapData models here. We can't create them until the # Page objects are created. global mapdata_objects_to_create from maps.models import MapData from pages.models import Page, slugify from django.contrib.gis.geos import Point, MultiPoint for item in mapdata_objects_to_create: page_name = item["pagename"].encode("utf-8") print "Adding mapdata for", page_name try: p = Page.objects.get(slug=slugify(item["pagename"])) except Page.DoesNotExist: print "*** Warning *** Skipping mapdata for page", page_name print (" Found mapdata for the page on wikimedia site, but " "the page does not exist in localwiki.") continue mapdata = MapData.objects.filter(page=p) y = float(item["lat"]) x = float(item["lon"]) point = Point(x, y) if mapdata: m = mapdata[0] points = m.points points.append(point) m.points = points else: points = MultiPoint(point) m = MapData(page=p, points=points) try: m.save() except IntegrityError: connection.close()
def forwards(self, orm): from pages.models import slugify from links import extract_internal_links for page in orm['pages.Page'].objects.all().iterator(): region = page.region links = extract_internal_links(page.content) print "..recording page links on %s" % smart_str(page.name) for pagename, count in links.iteritems(): page_exists = orm['pages.Page'].objects.filter(slug=slugify(pagename), region=region) if page_exists: destination = page_exists[0] else: destination = None if orm.Link.objects.filter(source=page, destination=destination).exists(): continue if orm.Link.objects.filter(source=page, destination_name__iexact=pagename).exists(): if destination: link = orm.Link.objects.filter(source=page, destination_name__iexact=pagename)[0] link.destination = destination link.save() else: link = orm.Link( source=page, region=region, destination=destination, destination_name=pagename, count=count, ) link.save()
def render(self, context): try: cls = '' url = self.href page = context['page'] if self.is_relative_link(url): if url.startswith('_files/'): filename = file_url_to_name(url) url = reverse('pages:file-info', args=[page.pretty_slug, filename]) try: file = PageFile.objects.get(slug__exact=page.slug, name__exact=filename) cls = ' class="file_%s"' % file.rough_type except PageFile.DoesNotExist: cls = ' class="missing_link"' else: try: page = Page.objects.get(slug__exact=slugify(url)) url = reverse('pages:show', args=[page.pretty_slug]) except Page.DoesNotExist: cls = ' class="missing_link"' # Convert to proper URL: My%20page -> My_page url = name_to_url(url_to_name(url)) url = reverse('pages:show', args=[url]) return '<a href="%s"%s>%s</a>' % (url, cls, self.nodelist.render(context)) except: return ''
def process_response(self, request, response): if response.status_code != 404: # No need to check for a redirect for non-404 responses. return response if _is_redirect(request) or _force_show_page(request): # Don't double-redirect and allow the page to be # force-displayed. return response r = None # Skip leading slash. slug = slugify(request.get_full_path()[1:]) # Skip trailing slash. if slug.endswith('/'): slug = slug[:-1] try: r = Redirect.objects.get(source=slug) except Redirect.DoesNotExist: pass if r is not None: return HttpResponseRedirect( r.destination.get_absolute_url() + '?&redirected_from=%s' % slug ) # No redirect was found. Return the response. return response
def get_object(self): page = Page(slug=slugify(self.kwargs['slug'])) # A dummy page object. latest_page = page.versions.most_recent() # Need to set the pk on the dummy page for correct MapData lookup. page.pk = latest_page.id page.name = latest_page.name return MapData(page=page)
def get_object(self): page_slug = self.kwargs.get('slug') region = self.get_region() page = Page.objects.get(slug=slugify(page_slug), region=region) mapdatas = MapData.objects.filter(page=page, region=region) if mapdatas: return mapdatas[0] return MapData(page=page, region=region)
def get_object(self): page = Page(slug=slugify(self.kwargs["slug"])) # A dummy page object. latest_page = page.history.most_recent() # Need to set the pk on the dummy page for correct MapData lookup. page.pk = latest_page.id page.name = latest_page.name return MapData(page=page)
def handler404(self, request, *args, **kwargs): page_slug = kwargs.get('slug') try: region = self.get_region(request=request, kwargs=kwargs) except Http404: return region_404_response(request, kwargs['region']) try: page = Page.objects.get(slug=slugify(page_slug), region=region) except Page.DoesNotExist: page = Page(slug=slugify(page_slug), region=region) mapdata = MapData(page=page, region=region) return HttpResponseNotFound( render(request, 'maps/mapdata_new.html', {'page': page, 'mapdata': mapdata}) )
def get_context_data(self, *args, **kwargs): context = super(LinksForPageView, self).get_context_data(*args, **kwargs) page = Page.objects.get(slug=slugify(self.kwargs.get('slug')), region=self.get_region()) context['page'] = page context['links_to_page'] = page.links_to_here.all() context['links_from_page'] = page.links.all() return context
def validate_page_slug(slug): from pages.models import slugify if slugify(slug) != slug: raise ValidationError( _('Provided slug is invalid. Slugs must be lowercase, ' 'contain no trailing or leading whitespace, and contain only alphanumber ' 'characters along with %(KEEP_CHARACTERS)s') % {'KEEP_CHARACTERS': SLUGIFY_KEEP})
def handler404(self, request, *args, **kwargs): page_slug = kwargs.get('slug') try: region = self.get_region(request=request, kwargs=kwargs) except Http404: return region_404_response(request, kwargs['region']) try: page = Page.objects.get(slug=slugify(page_slug), region=region) except Page.DoesNotExist: page = Page(slug=slugify(page_slug), region=region) mapdata = MapData(page=page, region=region) return HttpResponseNotFound( render(request, 'maps/mapdata_new.html', { 'page': page, 'mapdata': mapdata }))
def get_queryset(self): page = Page(slug=slugify(self.kwargs['slug'])) # A dummy page object. latest_page = page.versions.most_recent() # Need to set the pk on the dummy page for correct MapData lookup. page.pk = latest_page.id page.name = latest_page.name self.mapdata = MapData(page=page) return self.mapdata.versions.all()
def get_context_data(self, *args, **kwargs): context = super(LinksForPageView, self) .get_context_data(*args, **kwargs) page = Page.objects.get( slug=slugify(self.kwargs.get('slug')), region=self.get_region() ) context['page'] = page context['links_to_page'] = page.links_to_here.all() context['links_from_page'] = page.links.all() return context
def get_object(self): region = self.get_region() # A dummy page object. page = Page(slug=slugify(self.kwargs['slug']), region=region) latest_page = page.versions.most_recent() # Need to set the pk on the dummy page for correct MapData lookup. page.pk = latest_page.id page.name = latest_page.name return MapData(page=page, region=region)
def clean_name(self): name = self.cleaned_data['name'] try: page = Page.objects.get(slug__exact=slugify(name)) if self.instance != page: raise forms.ValidationError( _('A page with this name already exists')) except Page.DoesNotExist: pass return name
def render_wiki_template(self, name, params): try: template = Page.objects.get(slug__exact=slugify(u"templates/%s" % name), region=self.region) except Page.DoesNotExist: return "" text = unicode(template.content) for param in params: text = text.replace(u"{{%s}}" % unicode(param.name), unicode(param.value)) text = re.compile(u"{{.*?}}").sub(u"", text) return text
def clean_name(self): name = self.cleaned_data['name'] try: page = Page.objects.get(slug__exact=slugify(name)) if self.instance != page: raise forms.ValidationError( 'A page with this name already exists' ) except Page.DoesNotExist: pass return name
def get_queryset(self): region = self.get_region() # A dummy page object. page = Page(slug=slugify(self.kwargs['slug']), region=region) latest_page = page.versions.most_recent() # Need to set the pk on the dummy page for correct MapData lookup. page.pk = latest_page.id page.name = latest_page.name self.mapdata = MapData(page=page, region=region) return self.mapdata.versions.all()
def forwards(self, orm): from pages.models import slugify # Adding field 'Link.destination_slug' db.add_column(u'links_link', 'destination_slug', self.gf('django.db.models.fields.CharField')(default='testingo', max_length=255, db_index=True), keep_default=False) # Adding field 'IncludedPage.included_page_slug' db.add_column(u'links_includedpage', 'included_page_slug', self.gf('django.db.models.fields.CharField')(default='testingo', max_length=255, db_index=True), keep_default=False) for link in orm['links.Link'].objects.all().defer('destination', 'source').iterator(): link.destination_slug = slugify(link.destination_name) link.save() for included in orm['links.IncludedPage'].objects.all().defer('source', 'included_page').iterator(): included.included_page_slug = slugify(included.included_page_name) included.save()
def get_protected_objects(self): protected = [] slug = slugify(self.kwargs['slug']) page = Page.objects.filter(slug=slug) if page: protected.append(page[0]) redirect = Redirect.objects.filter(source=slug) if redirect: protected.append(redirect[0]) return protected
def extract_internal_links(html): """ Args: html: A string containing an HTML5 fragment. Returns: A dictionary of the linked-to page names and the number of times that link has been made in this HTML. E.g. {'Downtown Park': 3, 'Rollercoaster': 1} """ parser = html5lib.HTMLParser( tree=html5lib.treebuilders.getTreeBuilder("lxml"), namespaceHTMLElements=False) # Wrap to make the tree lookup easier tree = parser.parseFragment('<div>%s</div>' % html)[0] a_s = tree.xpath('//a') # Grab the links if they're not anchors or external. d = {} for a in a_s: if 'href' not in a.attrib: continue href = a.attrib['href'] if (not _is_absolute(href) and not _is_anchor_link(href) and not _is_plugin(a) and not _invalid(href)): try: if not slugify(href) in d: d[slugify(href)] = (url_to_name(href), 1) else: name, count = d[slugify(href)] d[slugify(href)] = (name, count + 1) except UnicodeDecodeError: pass # Format the result correctly. links = {} for _, (name, count) in d.iteritems(): links[name] = count return links
def process_context(self, context): super(IncludePageNode, self).process_context(context) try: self.page = Page.objects.get(slug__exact=slugify(self.name), region=self.region) # Keep track of the fact this page was included (for caching purposes) if 'request' in context: _depends_on = getattr(context['request'], '_depends_on_header', []) _depends_on.append(self.page.id) context['request']._depends_on = _depends_on except Page.DoesNotExist: self.page = None
def get_object(self, request, slug): # TODO: Break out this MapData-get-page pattern into a function. # Non-DRY. page = Page(slug=slugify(slug)) latest_page = page.versions.most_recent() # Need to set the pk on the dummy page for correct MapData lookup. page.pk = latest_page.id page.name = latest_page.name obj = MapData(page=page) obj.page = page obj.title = _('Map for "%s"') % obj.page.name obj.slug = page.slug return obj
def record_page_links(page): region = page.region links = extract_internal_links(page.content) for pagename, count in links.iteritems(): qs = Link.objects.filter(source=page, region=region) link_exists = qs.filter( destination_slug=slugify(pagename)) | qs.filter( destination__slug=slugify(pagename)) if link_exists: link = link_exists[0] if link.count == count: # No new links with this name on this page, so skip updating. continue link.count = count else: page_exists = Page.objects.filter(slug=slugify(pagename), region=region) if page_exists: destination = page_exists[0] else: destination = None # Exists for some reason already (probably running a script that's moving between regions?) if destination and Link.objects.filter( source=page, destination=destination).exists(): continue link = Link( source=page, region=region, destination=destination, destination_name=pagename, destination_slug=slugify(pagename), count=count, ) link.save()
def get_object(self): page = Page(slug=slugify(self.kwargs['slug'])) # A dummy page object. latest_page = page.versions.most_recent() # Need to set the pk on the dummy page for correct MapData lookup. page.pk = latest_page.id page.name = latest_page.name self.page = page mapdata = MapData(page=page) version = self.kwargs.get('version') date = self.kwargs.get('date') if version: return mapdata.versions.as_of(version=int(version)) if date: return mapdata.versions.as_of(date=dateparser(date))
def process_response(self, request, response): if response.status_code != 404: # No need to check for a redirect for non-404 responses. return response if request.META['HTTP_HOST'].endswith(settings.MAIN_HOSTNAME): page_routing_pattern = page_routing_pattern_region else: page_routing_pattern = page_routing_pattern_no_region if _is_redirect(request) or _force_show_page(request): # Don't double-redirect and allow the page to be # force-displayed. return response r = None re_match = page_routing_pattern.match(request.get_full_path()) if not re_match: return response slug = slugify(re_match.group('slug')) if request.META['HTTP_HOST'].endswith(settings.MAIN_HOSTNAME): region_slug = re_match.group('region') region = Region.objects.filter(slug=region_slug) else: region = Region.objects.filter(regionsettings__domain=request.META['HTTP_HOST']) if not region: return response region = region[0] try: r = Redirect.objects.get(source=slug, region=region) except Redirect.DoesNotExist: pass if r is not None: return HttpResponseRedirect( r.destination.get_absolute_url() + '?&redirected_from=%s' % slug ) # No redirect was found. Return the response. return response
def get_user_page(user, request): """ Hacky heuristics for picking the underlying Page that holds the userpage content. TODO: Make this all belong the a single administrative region, 'users', once we have a notifications framework in place. """ from pages.models import Page, slugify pagename = "Users/%s" % user.username user_pages = Page.objects.filter(slug=slugify(pagename)) if user_pages: # Just pick the first one return user_pages[0] else: # Check to see if they've edited a region recently edited_pages = Page.versions.filter(version_info__user=user) referer = request.META.get('HTTP_REFERER') if edited_pages.exists(): region = edited_pages[0].region return Page(name=pagename, region=region) # Let's try and guess by the previous URL. Ugh! if referer: urlparts = urllib_parse.urlparse(referer) # Is this host us? for host in settings.ALLOWED_HOSTS: if urlparts.netloc.endswith(host): pathparts = parts.path.split('/') # Is the path in a region? if len(pathparts) > 1 and Region.objects.filter( slug=pathparts[1]).exists(): return Page( name=pagename, region=Region.objects.get(slug=pathparts[1])) # Put it in the main region for now :/ return Page(name=pagename, region=get_main_region())
def forwards(self, orm): from pages.models import slugify from links import extract_internal_links for page in orm['pages.Page'].objects.all().iterator(): region = page.region links = extract_internal_links(page.content) print "..recording page links on %s" % smart_str(page.name) for pagename, count in links.iteritems(): page_exists = orm['pages.Page'].objects.filter( slug=slugify(pagename), region=region) if page_exists: destination = page_exists[0] else: destination = None if orm.Link.objects.filter(source=page, destination=destination).exists(): continue if orm.Link.objects.filter( source=page, destination_name__iexact=pagename).exists(): if destination: link = orm.Link.objects.filter( source=page, destination_name__iexact=pagename)[0] link.destination = destination link.save() else: link = orm.Link( source=page, region=region, destination=destination, destination_name=pagename, count=count, ) link.save()
def render(self, context): try: try: page = Page.objects.get(slug__exact=slugify(self.page_name)) header = '' if 'showtitle' in self.args: header = ('<h2><a href="%s">%s</a></h2>' % (page.pretty_slug, page.name)) content = header + page.content # prevent endless loops context_page = context['page'] include_stack = context.get('_include_stack', []) include_stack.append(context_page.name) if page.name in include_stack: content = ('<p class="plugin includepage">Unable to' ' include <a href="%s">%s</a>: endless include' ' loop.</p>' % (self.page_name, self.page_name)) context['_include_stack'] = include_stack context['page'] = page template_text = html_to_template_text(content, context) # restore context context['_include_stack'].pop() context['page'] = context_page except Page.DoesNotExist: page_url = reverse('pages:show', args=[name_to_url(self.page_name)]) template_text = ('<p class="plugin includepage">Unable to' ' include <a href="%s" class="missing_link">%s</a></p>' % (page_url, self.page_name)) template = Template(template_text) return self.render_template(template, context) except: if settings.TEMPLATE_DEBUG: raise return ''