def test_get_urls(self): text = """ Some junk http://airmozilla/manage/events/1068/ stuff https://etherpad.mozilla.org/sumo-mobile """ urls = list(scraper.get_urls(text)) eq_( urls, [ 'http://airmozilla/manage/events/1068/', 'https://etherpad.mozilla.org/sumo-mobile' ] )
def handle(self, *args, **options): if not args: raise CommandError(self.args) for arg in args: if arg.isdigit(): event = Event.objects.get(pk=arg) else: if '://' in arg: slug = urlparse(arg).path.split('/')[1] else: slug = arg event = Event.objects.get(slug=slug) print scrape_urls(get_urls(event.additional_links))['text']
def event_transcript(request, id): event = get_object_or_404(Event, id=id) context = {} from airmozilla.manage.scraper import get_urls, scrape_urls scrapeable_urls = list(get_urls(event.additional_links)) if request.method == 'POST': form = forms.EventTranscriptForm( instance=event, data=request.POST, ) if form.is_valid(): form.save() messages.success( request, 'Event transcript saved.' ) return redirect('manage:event_edit', event.pk) else: initial = {} if request.GET.getlist('urls'): response = scrape_urls(request.GET.getlist('urls')) if response['text']: initial['transcript'] = response['text'] errors = [] for result in response['results']: if not result['worked']: errors.append('%s: %s' % (result['url'], result['status'])) if errors: errors.insert(0, 'Some things could not be scraped correctly') messages.error( request, '\n'.join(errors) ) form = forms.EventTranscriptForm(instance=event, initial=initial) amara_videos = AmaraVideo.objects.filter(event=event) context['event'] = event context['amara_videos'] = amara_videos context['form'] = form context['scrapeable_urls'] = scrapeable_urls return render(request, 'manage/event_transcript.html', context)
def test_get_urls(self): text = """ Some junk http://airmozilla/manage/events/1068/ stuff https://etherpad.mozilla.org/sumo-mobile hello, this is madness https://docs.python.org/2/library/urlparse.html.. madness I say https://github.com/mozilla/airmozilla........ yes http://blog.mozilla.org/devtools/. """ urls = list(scraper.get_urls(text)) eq_(urls, [ 'http://airmozilla/manage/events/1068/', 'https://etherpad.mozilla.org/sumo-mobile', 'https://docs.python.org/2/library/urlparse.html', 'https://github.com/mozilla/airmozilla', 'http://blog.mozilla.org/devtools/' ])
def test_get_urls(self): text = """ Some junk http://airmozilla/manage/events/1068/ stuff https://etherpad.mozilla.org/sumo-mobile hello, this is madness https://docs.python.org/2/library/urlparse.html.. madness I say https://github.com/mozilla/airmozilla........ yes http://blog.mozilla.org/devtools/. """ urls = list(scraper.get_urls(text)) eq_( urls, [ 'http://airmozilla/manage/events/1068/', 'https://etherpad.mozilla.org/sumo-mobile', 'https://docs.python.org/2/library/urlparse.html', 'https://github.com/mozilla/airmozilla', 'http://blog.mozilla.org/devtools/' ] )