def entry_detail(request, year, month, day, page_id, slug): date = datetime.date(year=int(year), month=int(month), day=int(day)) chunks = [] page = 0 while True: response = capitolwords.text(date=date, page_id=page_id, sort='id asc', per_page=1000, page=page) chunks += response if len(response) < 1000: break page += 1 chunks.sort(key=itemgetter('order')) similar_entries = get_similar_entries(chunks) metadata = chunks[0] year, month, day = [int(x) for x in metadata['date'].split('-')] metadata['date'] = datetime.date(year=year, month=month, day=day) return render_to_response( 'cwod/entry_detail.html', { 'date': date, 'page_id': page_id, 'chunks': chunks, 'metadata': metadata, #'entries': entries_for_date(date), 'similar_entries': similar_entries, }, context_instance=RequestContext(request))
def entry_detail(request, year, month, day, page_id, slug): date = datetime.date(year=int(year), month=int(month), day=int(day)) chunks = [] page = 0 while True: response = capitolwords.text(date=date, page_id=page_id, sort='id asc', per_page=1000, page=page) chunks += response if len(response) < 1000: break page += 1 chunks.sort(key=itemgetter('order')) similar_entries = get_similar_entries(chunks) metadata = chunks[0] year, month, day = [int(x) for x in metadata['date'].split('-')] metadata['date'] = datetime.date(year=year, month=month, day=day) return render_to_response('cwod/entry_detail.html', {'date': date, 'page_id': page_id, 'chunks': chunks, 'metadata': metadata, #'entries': entries_for_date(date), 'similar_entries': similar_entries, }, context_instance=RequestContext(request))
def entries_for_date(date, **kwargs): page = 0 chambers = {'Extensions': defaultdict(set), 'House': defaultdict(set), 'Senate': defaultdict(set)} has_entries = False while True: response = capitolwords.text(date=date, sort='id desc', page=page, **kwargs) if response and not has_entries: has_entries = True for entry in response: try: chambers[entry['chamber']][(entry['title'], entry['pages'], entry['origin_url'], entry['speaking'][0])].add(entry['speaker_last']) except KeyError: chambers[entry['chamber']][(entry['title'], entry['pages'], entry['origin_url'], '')].add(entry['speaker_last']) except KeyError: continue if len(response) < 50: break page += 1 if not has_entries: return [] chambers['Extensions of Remarks'] = chambers['Extensions'] del(chambers['Extensions']) for k, v in chambers.iteritems(): chambers[k] = sorted(v.items(), lambda x, y: cmp(x[0][1], y[0][1])) return chambers.items()
def legislator_detail(request, bioguide_id, slug=None): legislator = legislator_lookup(bioguide_id) if not legislator: raise Http404 if legislator['slug'] != slug: return HttpResponsePermanentRedirect(reverse('cwod_legislator_detail', kwargs={'bioguide_id':bioguide_id, 'slug':legislator['slug']})) similar_legislators = [] for i in get_similar_entities('bioguide', bioguide_id)[:10]: i['legislator'] = legislator_lookup(i['bioguide']) similar_legislators.append(i) ngrams = {} for n in range(1, 6): ngrams[GRAM_NAMES[n-1]] = capitolwords.top_phrases( entity_type='legislator', entity_value=bioguide_id, n=n, per_page=30 ) ngrams = ngrams.iteritems() entries = capitolwords.text(bioguide_id=bioguide_id, sort='date desc', per_page=5) return render_to_response('cwod/legislator_detail.html', {'legislator': legislator, 'current_congress': get_current_congress(), 'similar_legislators': similar_legislators, 'entries': entries, 'ngrams': ngrams, }, context_instance=RequestContext(request))
def state_detail(request, state): state_name = dict(STATE_CHOICES + TERRITORY_CHOICES).get(state) if not state_name: raise Http404 entries = capitolwords.text(state='"%s"' % state, sort='date desc,score desc', per_page=5) ngrams = {} for n in range(1, 6): ngrams[GRAM_NAMES[n - 1]] = capitolwords.top_phrases( entity_type='state', entity_value=state, n=n, per_page=30) ngrams = ngrams.iteritems() similar_states = get_similar_entities('state', state) # legislators = LegislatorRole.objects.filter(state=state, end_date__gte=datetime.date.today()) legislators = capitolwords.legislators(state=state, congress=get_current_congress()) def sort_districts(x, y): try: x_district = int(x['district']) except ValueError: x_district = 0 try: y_district = int(y['district']) except ValueError: y_district = 0 return cmp(x_district, y_district) legislators = sorted(legislators, sort_districts) bodies = { 'House': [], 'Senate': [], } for legislator in legislators: if legislator['chamber'] == 'Senate': bodies['Senate'].append(legislator) else: bodies['House'].append(legislator) bodies = sorted(bodies.items(), key=itemgetter(0), reverse=True) return render_to_response( 'cwod/state_detail.html', { 'state': state, 'state_name': state_name, 'entries': entries, 'ngrams': ngrams, #'other_states': other_states, 'similar_states': similar_states, 'bodies': bodies, }, context_instance=RequestContext(request))
def state_detail(request, state): state_name = dict(STATE_CHOICES + TERRITORY_CHOICES).get(state) if not state_name: raise Http404 entries = capitolwords.text(state='"%s"' % state, sort='date desc,score desc', per_page=5) ngrams = {} for n in range(1, 6): ngrams[GRAM_NAMES[n-1]] = capitolwords.top_phrases( entity_type='state', entity_value=state, n=n, per_page=30 ) ngrams = ngrams.iteritems() similar_states = get_similar_entities('state', state) # legislators = LegislatorRole.objects.filter(state=state, end_date__gte=datetime.date.today()) legislators = capitolwords.legislators(state=state, congress=get_current_congress()) def sort_districts(x, y): try: x_district = int(x['district']) except ValueError: x_district = 0 try: y_district = int(y['district']) except ValueError: y_district = 0 return cmp(x_district, y_district) legislators = sorted(legislators, sort_districts) bodies = {'House': [], 'Senate': [], } for legislator in legislators: if legislator['chamber'] == 'Senate': bodies['Senate'].append(legislator) else: bodies['House'].append(legislator) bodies = sorted(bodies.items(), key=itemgetter(0), reverse=True) return render_to_response('cwod/state_detail.html', {'state': state, 'state_name': state_name, 'entries': entries, 'ngrams': ngrams, #'other_states': other_states, 'similar_states': similar_states, 'bodies': bodies, }, context_instance=RequestContext(request))
def faster_term_detail(request, term): # For better URLs, replace spaces with underscores and strip trailing punctuation term = term.strip('"\' ') if re.search(r'\s', term) or re.search(r'[%s]$' % ''.join(PUNCTUATION), term): term = re.sub(r'[%s]$' % ''.join(PUNCTUATION), '', term) term = re.sub(r'\s', '_', term.strip()) url = reverse('cwod_term_detail', kwargs={'term': term}) return HttpResponsePermanentRedirect(url) if request.GET.get('js') == 'false': return term_detail(request, term) term = re.sub(r'_', ' ', term) # Recent entries all_entries = capitolwords.text(phrase=term, bioguide_id="['' TO *]", sort='date desc,score desc', per_page=50) # Only show one entry for each Congressional Record page. entries = [] urls = [] for entry in all_entries: if entry['origin_url'] in urls: continue urls.append(entry['origin_url']) entries.append(entry) entries = _highlight_entries(entries, term) uri = request.build_absolute_uri() if '?' in uri: no_js_uri = uri + '&js=false' else: no_js_uri = uri + '?js=false' return render_to_response('cwod/term_detail.html', { 'term': term, 'entries': entries, 'needs_js': True, 'no_js_uri': no_js_uri, 'state_choices': US_STATES + US_TERRITORIES, }, context_instance=RequestContext(request))
def entries_for_date(date, **kwargs): page = 0 chambers = { 'Extensions': defaultdict(set), 'House': defaultdict(set), 'Senate': defaultdict(set) } has_entries = False while True: response = capitolwords.text(date=date, sort='id desc', page=page, **kwargs) if response and not has_entries: has_entries = True for entry in response: try: chambers[entry['chamber']][(entry['title'], entry['pages'], entry['origin_url'], entry['speaking'][0])].add( entry['speaker_last']) except KeyError: chambers[entry['chamber']][(entry['title'], entry['pages'], entry['origin_url'], '')].add(entry['speaker_last']) except KeyError: continue if len(response) < 50: break page += 1 if not has_entries: return [] chambers['Extensions of Remarks'] = chambers['Extensions'] del (chambers['Extensions']) for k, v in chambers.iteritems(): chambers[k] = sorted(v.items(), lambda x, y: cmp(x[0][1], y[0][1])) return chambers.items()
def faster_term_detail(request, term): # For better URLs, replace spaces with underscores and strip trailing punctuation term = term.strip('"\' ') if re.search(r'\s', term) or re.search(r'[%s]$' % ''.join(PUNCTUATION), term): term = re.sub(r'[%s]$' % ''.join(PUNCTUATION), '', term) term = re.sub(r'\s', '_', term.strip()) url = reverse('cwod_term_detail', kwargs={'term': term}) return HttpResponsePermanentRedirect(url) if request.GET.get('js') == 'false': return term_detail(request, term) term = re.sub(r'_', ' ', term) # Recent entries all_entries = capitolwords.text(phrase=term, bioguide_id="['' TO *]", sort='date desc,score desc', per_page=50) # Only show one entry for each Congressional Record page. entries = [] urls = [] for entry in all_entries: if entry['origin_url'] in urls: continue urls.append(entry['origin_url']) entries.append(entry) entries = _highlight_entries(entries, term) uri = request.build_absolute_uri() if '?' in uri: no_js_uri = uri + '&js=false' else: no_js_uri = uri + '?js=false' return render_to_response('cwod/term_detail.html', {'term': term, 'entries': entries, 'needs_js': True, 'no_js_uri': no_js_uri, 'state_choices': US_STATES + US_TERRITORIES, }, context_instance=RequestContext(request))
def legislator_detail(request, bioguide_id, slug=None): legislator = legislator_lookup(bioguide_id) if not legislator: raise Http404 if legislator['slug'] != slug: return HttpResponsePermanentRedirect( reverse('cwod_legislator_detail', kwargs={ 'bioguide_id': bioguide_id, 'slug': legislator['slug'] })) similar_legislators = [] for i in get_similar_entities('bioguide', bioguide_id)[:10]: i['legislator'] = legislator_lookup(i['bioguide']) similar_legislators.append(i) ngrams = {} for n in range(1, 6): ngrams[GRAM_NAMES[n - 1]] = capitolwords.top_phrases( entity_type='legislator', entity_value=bioguide_id, n=n, per_page=30) ngrams = ngrams.iteritems() entries = capitolwords.text(bioguide_id=bioguide_id, sort='date desc', per_page=5) return render_to_response('cwod/legislator_detail.html', { 'legislator': legislator, 'current_congress': get_current_congress(), 'similar_legislators': similar_legislators, 'entries': entries, 'ngrams': ngrams, }, context_instance=RequestContext(request))
def term_detail(request, term): # For better URLs, replace spaces with underscores & strip trailing punctuation, # as well as bordering quotation marks term = term.strip('"\' ') if re.search(r'\s', term) or re.search(r'[%s]$' % ''.join(PUNCTUATION), term): term = re.sub(r'[%s]$' % ''.join(PUNCTUATION), '', term) term = re.sub(r'\s', '_', term.strip()) url = reverse('cwod_term_detail', kwargs={'term': term}) return HttpResponsePermanentRedirect(url) term = re.sub(r'_', ' ', term) stem = request.GET.get('stem', 'false') # Timeline timeline_kwargs = { 'phrase': term, 'granularity': 'month', 'percentages': 'true', #'smoothing': 4, 'mincount': 0, 'stem': stem, 'legend': 'false', } timeline_url = capitolwords.timeline(**timeline_kwargs) timeline_kwargs.update({ 'split_by_party': 'true', 'legend': 'true', }) party_timeline_url = capitolwords.timeline(**timeline_kwargs) # custom timeline custom_timeline_url = timeline_url party = request.GET.get('party') state = request.GET.get('state') bioguide_id = request.GET.get('bioguide_id') if party or state or bioguide_id: try: custom_timeline_url = capitolwords.timeline( **{ 'phrase': term, 'granularity': 'month', 'percentages': 'true', 'mincount': 0, 'party': party, 'state': state, 'bioguide_id': bioguide_id, 'stem': stem, }) except ApiError: custom_timeline_url = 'error' """ popular_dates = sorted(capitolwords.phrase_by_date_range(phrase=term, per_page=15, sort='count', percentages='true'), key=itemgetter('percentage'), reverse=True)[:10] popular_dates = [dateparse(x['day']).date() for x in popular_dates] """ # Word tree """ tree = capitolwords.wordtree(phrase=term) tree = [x for x in tree if not re.search(r' (%s)$' % '|'.join(STOPWORDS), x['phrase']) and x['count'] > 1] """ # Party pie chart party_pie_url = capitolwords.piechart(phrase=term, entity_type='party', labels='true') # Commonly said by these legislators legislators = capitolwords.phrase_by_entity_type('legislator', phrase=term, sort='count', per_page=10) for legislator in legislators: legislator['legislator'] = LegislatorRole.objects.filter( bioguide_id=legislator['legislator']).order_by( '-congress').select_related()[0] # Popularity by state states = capitolwords.phrase_by_entity_type('state', phrase=term, sort='count', per_page=10) # Recent entries all_entries = capitolwords.text(phrase=term, bioguide_id="['' TO *]", sort='date desc,score desc', per_page=50) # Only show one entry for each Congressional Record page. entries = [] urls = [] for entry in all_entries: if entry['origin_url'] in urls: continue urls.append(entry['origin_url']) entries.append(entry) entries = _highlight_entries(entries, term) return render_to_response( 'cwod/term_detail.html', { 'term': term, 'timeline_url': timeline_url['url'], 'party_timeline_url': party_timeline_url['url'], 'custom_timeline_url': custom_timeline_url, #'popular_dates': popular_dates, 'party_pie_url': party_pie_url['url'], 'legislators': legislators, 'states': states, #'tree': tree, 'entries': entries, 'search': request.GET.get('search') == '1', 'state_choices': US_STATES + US_TERRITORIES, }, context_instance=RequestContext(request))
def term_detail(request, term): # For better URLs, replace spaces with underscores & strip trailing punctuation, # as well as bordering quotation marks term = term.strip('"\' ') if re.search(r'\s', term) or re.search(r'[%s]$' % ''.join(PUNCTUATION), term): term = re.sub(r'[%s]$' % ''.join(PUNCTUATION), '', term) term = re.sub(r'\s', '_', term.strip()) url = reverse('cwod_term_detail', kwargs={'term': term}) return HttpResponsePermanentRedirect(url) term = re.sub(r'_', ' ', term) stem = request.GET.get('stem', 'false') # Timeline timeline_kwargs = {'phrase': term, 'granularity': 'month', 'percentages': 'true', #'smoothing': 4, 'mincount': 0, 'stem': stem, 'legend': 'false', } timeline_url = capitolwords.timeline(**timeline_kwargs) timeline_kwargs.update({'split_by_party': 'true', 'legend': 'true', }) party_timeline_url = capitolwords.timeline(**timeline_kwargs) # custom timeline custom_timeline_url = timeline_url party = request.GET.get('party') state = request.GET.get('state') bioguide_id = request.GET.get('bioguide_id') if party or state or bioguide_id: try: custom_timeline_url = capitolwords.timeline(**{'phrase': term, 'granularity': 'month', 'percentages': 'true', 'mincount': 0, 'party': party, 'state': state, 'bioguide_id': bioguide_id, 'stem': stem, }) except ApiError: custom_timeline_url = 'error' """ popular_dates = sorted(capitolwords.phrase_by_date_range(phrase=term, per_page=15, sort='count', percentages='true'), key=itemgetter('percentage'), reverse=True)[:10] popular_dates = [dateparse(x['day']).date() for x in popular_dates] """ # Word tree """ tree = capitolwords.wordtree(phrase=term) tree = [x for x in tree if not re.search(r' (%s)$' % '|'.join(STOPWORDS), x['phrase']) and x['count'] > 1] """ # Party pie chart party_pie_url = capitolwords.piechart(phrase=term, entity_type='party', labels='true') # Commonly said by these legislators legislators = capitolwords.phrase_by_entity_type('legislator', phrase=term, sort='count', per_page=10) for legislator in legislators: legislator['legislator'] = LegislatorRole.objects.filter(bioguide_id=legislator['legislator']).order_by('-congress').select_related()[0] # Popularity by state states = capitolwords.phrase_by_entity_type('state', phrase=term, sort='count', per_page=10) # Recent entries all_entries = capitolwords.text(phrase=term, bioguide_id="['' TO *]", sort='date desc,score desc', per_page=50) # Only show one entry for each Congressional Record page. entries = [] urls = [] for entry in all_entries: if entry['origin_url'] in urls: continue urls.append(entry['origin_url']) entries.append(entry) entries = _highlight_entries(entries, term) return render_to_response('cwod/term_detail.html', {'term': term, 'timeline_url': timeline_url['url'], 'party_timeline_url': party_timeline_url['url'], 'custom_timeline_url': custom_timeline_url, #'popular_dates': popular_dates, 'party_pie_url': party_pie_url['url'], 'legislators': legislators, 'states': states, #'tree': tree, 'entries': entries, 'search': request.GET.get('search') == '1', 'state_choices': US_STATES + US_TERRITORIES, }, context_instance=RequestContext(request))