def save_activity(self): statements = self.statement_set.filter(procedural=False).select_related('member', 'politician') politicians = set([s.politician for s in statements if s.politician]) for pol in politicians: topics = {} wordcount = 0 for statement in filter(lambda s: s.politician == pol, statements): wordcount += statement.wordcount if statement.topic in topics: # If our statement is longer than the previous statement on this topic, # use its text for the excerpt. if len(statement.text_plain()) > len(topics[statement.topic][1]): topics[statement.topic][1] = statement.text_plain() topics[statement.topic][2] = statement.get_absolute_url() else: topics[statement.topic] = [statement.slug, statement.text_plain(), statement.get_absolute_url()] for topic in topics: if self.document_type == Document.DEBATE: activity.save_activity({ 'topic': topic, 'url': topics[topic][2], 'text': topics[topic][1], }, politician=pol, date=self.date, guid='statement_%s' % topics[topic][2], variety='statement') elif self.document_type == Document.EVIDENCE: assert len(topics) == 1 if wordcount < 80: continue (seq, text, url) = topics.values()[0] activity.save_activity({ 'meeting': self.committeemeeting, 'committee': self.committeemeeting.committee, 'text': text, 'url': url, 'wordcount': wordcount, }, politician=pol, date=self.date, guid='cmte_%s' % url, variety='committee')
def save_tweets(): twitter_to_pol = dict([(int(i.value), i.politician) for i in PoliticianInfo.objects.filter(schema='twitter_id').select_related('politician')]) screen_names = set(PoliticianInfo.objects.filter(schema='twitter').values_list('value', flat=True)) twit = twitter.Twitter( auth=twitter.OAuth(**settings.TWITTER_OAUTH), domain='api.twitter.com/1.1') statuses = twit.lists.statuses(slug='mps', owner_screen_name='openparlca', include_rts=False, count=200) statuses.reverse() for status in statuses: try: pol = twitter_to_pol[status['user']['id']] except KeyError: logger.error("Can't find twitter ID %s (name %s)" % (status['user']['id'], status['user']['screen_name'])) continue if status['user']['screen_name'] not in screen_names: # Changed screen name pol.set_info('twitter', status['user']['screen_name']) date = datetime.date.fromtimestamp( email.utils.mktime_tz( email.utils.parsedate_tz(status['created_at']) ) ) # f**k you, time formats guid = 'twit_%s' % status['id'] # Twitter apparently escapes < > but not & " # so I'm clunkily unescaping lt and gt then reescaping in the template text = status['text'].replace('<', '<').replace('>', '>') activity.save_activity({'text': status['text']}, politician=pol, date=date, guid=guid, variety='twitter')
def save(self, *args, **kwargs): save_activity = False if not self.pk: save_activity = True super(MemberVote, self).save(*args, **kwargs) if save_activity: activity.save_activity(self, politician=self.politician, date=self.votequestion.date)
def save_tweets(): OLDEST = datetime.date.today() - datetime.timedelta(days=1) pols = Politician.objects.current() for pol in pols: if 'twitter' not in pol.info(): continue if 'twitter_id' in pol.info(): twitter_id = pol.info()['twitter_id'] else: try: twitter_id = get_id_from_screen_name(pol.info()['twitter']) pol.set_info('twitter_id', twitter_id) except ObjectDoesNotExist: logger.error('Screen name appears to be invalid: %s', pol.info()['twitter']) pol.del_info('twitter') continue try: timeline = twitter_api_request('statuses/user_timeline', { 'user_id': twitter_id, 'include_rts': False }) except ObjectDoesNotExist: logger.warning("Invalid twitter ID for %s", pol) continue except requests.HTTPError as e: logger.exception("HTTPError for %s %s", pol, e) continue except requests.ConnectionError: continue if timeline and timeline[0]['user']['screen_name'] != pol.info( )['twitter']: # Changed screen name new_name = timeline[0]['user']['screen_name'] logger.warning("Screen name change: new %s old %s", new_name, pol.info()['twitter']) pol.set_info('twitter', new_name) timeline.reverse() for tweet in timeline: date = datetime.date.fromtimestamp( email.utils.mktime_tz( email.utils.parsedate_tz( tweet['created_at']))) # f**k you, time formats if date < OLDEST: continue guid = 'twit_%s' % tweet['id'] # Twitter apparently escapes < > but not & " # so I'm clunkily unescaping lt and gt then reescaping in the template text = tweet['text'].replace('<', '<').replace('>', '>') activity.save_activity({'text': text}, politician=pol, date=date, guid=guid, variety='twitter')
def save_sponsor_activity(self): if self.sponsor_politician: activity.save_activity( obj=self, politician=self.sponsor_politician, date=self.added - datetime.timedelta(days=1), # we generally pick it up the day after it appears variety='billsponsor', )
def save_sponsor_activity(self): if self.sponsor_politician: activity.save_activity( obj=self, politician=self.sponsor_politician, date=self.introduced if self.introduced else (self.added - datetime.timedelta(days=1)), variety='billsponsor', )
def save_politician_news(pol): items = news_items_for_pol(pol) for item in items: activity.save_activity(item, politician=pol, date=item['date'], guid=item['guid'], variety='gnews')
def save_sponsor_activity(self): if self.sponsor_politician: activity.save_activity( obj=self, politician=self.sponsor_politician, date=self.added - datetime.timedelta( days=1 ), # we generally pick it up the day after it appears variety='billsponsor', )
def save_tweets(): OLDEST = datetime.date.today() - datetime.timedelta(days=1) pols = Politician.objects.current() for pol in pols: if 'twitter' not in pol.info(): continue if 'twitter_id' in pol.info(): twitter_id = pol.info()['twitter_id'] else: try: twitter_id = get_id_from_screen_name(pol.info()['twitter']) pol.set_info('twitter_id', twitter_id) except ObjectDoesNotExist: logger.error('Screen name appears to be invalid: %s', pol.info()['twitter']) pol.del_info('twitter') continue try: timeline = twitter_api_request('statuses/user_timeline', {'user_id': twitter_id, 'include_rts': False}) except ObjectDoesNotExist: logger.warning("Invalid twitter ID for %s", pol) continue except requests.HTTPError as e: logger.exception("HTTPError for %s %s", pol, e) continue except requests.ConnectionError: continue if timeline and timeline[0]['user']['screen_name'] != pol.info()['twitter']: # Changed screen name new_name = timeline[0]['user']['screen_name'] logger.warning("Screen name change: new %s old %s", new_name, pol.info()['twitter']) pol.set_info('twitter', new_name) timeline.reverse() for tweet in timeline: date = datetime.date.fromtimestamp( email.utils.mktime_tz( email.utils.parsedate_tz(tweet['created_at']) ) ) # f**k you, time formats if date < OLDEST: continue guid = 'twit_%s' % tweet['id'] # Twitter apparently escapes < > but not & " # so I'm clunkily unescaping lt and gt then reescaping in the template text = tweet['text'].replace('<', '<').replace('>', '>') activity.save_activity({'text': text}, politician=pol, date=date, guid=guid, variety='twitter')
def save_activity(self): for pol in Politician.objects.filter(statement__hansard=self).distinct(): topics = {} for statement in self.statement_set.filter(member__politician=pol, speaker=False): if statement.topic in topics: # If our statement is longer than the previous statement on this topic, # use its text for the excerpt. if len(statement.text_plain()) > len(topics[statement.topic][1]): topics[statement.topic][1] = statement.text_plain() topics[statement.topic][2] = statement.get_absolute_url() else: topics[statement.topic] = [statement.sequence, statement.text_plain(), statement.get_absolute_url()] for topic in topics: activity.save_activity({ 'topic': topic, 'url': topics[topic][2], 'text': topics[topic][1], }, politician=pol, date=self.date, guid='statement_%s' % topics[topic][2], variety='statement')
def save_activity(self): statements = self.statement_set.filter(procedural=False).select_related("member", "politician") politicians = set([s.politician for s in statements if s.politician]) for pol in politicians: topics = {} wordcount = 0 for statement in filter(lambda s: s.politician == pol, statements): wordcount += statement.wordcount if statement.topic in topics: # If our statement is longer than the previous statement on this topic, # use its text for the excerpt. if len(statement.text_plain()) > len(topics[statement.topic][1]): topics[statement.topic][1] = statement.text_plain() topics[statement.topic][2] = statement.get_absolute_url() else: topics[statement.topic] = [statement.slug, statement.text_plain(), statement.get_absolute_url()] for topic in topics: if self.document_type == Document.DEBATE: activity.save_activity( {"topic": topic, "url": topics[topic][2], "text": topics[topic][1]}, politician=pol, date=self.date, guid="statement_%s" % topics[topic][2], variety="statement", ) elif self.document_type == Document.EVIDENCE: assert len(topics) == 1 if wordcount < 80: continue (seq, text, url) = topics.values()[0] activity.save_activity( { "meeting": self.committeemeeting, "committee": self.committeemeeting.committee, "text": text, "url": url, "wordcount": wordcount, }, politician=pol, date=self.date, guid="cmte_%s" % url, variety="committee", )
def save_tweets(): twitter_to_pol = dict([(i.value.lower(), i.politician) for i in PoliticianInfo.objects.filter(schema='twitter').select_related('politician')]) twit = twitter.Twitter(settings.TWITTER_USERNAME, settings.TWITTER_PASSWORD) statuses = twit.openparlca.lists.mps.statuses(per_page=200) statuses.reverse() for status in statuses: pol = twitter_to_pol[status['user']['screen_name'].lower()] date = datetime.date.fromtimestamp( email.utils.mktime_tz( email.utils.parsedate_tz(status['created_at']) ) ) # f**k you, time formats guid = 'twit_%s' % status['id'] # Twitter apparently escapes < > but not & " # so I'm clunkily unescaping lt and gt then reescaping in the template text = status['text'].replace('<', '<').replace('>', '>') activity.save_activity({'text': status['text']}, politician=pol, date=date, guid=guid, variety='twitter')
def save_tweets(): twitter_to_pol = dict([ (int(i.value), i.politician) for i in PoliticianInfo.objects.filter( schema='twitter_id').select_related('politician') ]) screen_names = set( PoliticianInfo.objects.filter(schema='twitter').values_list('value', flat=True)) twit = twitter.Twitter(auth=twitter.OAuth(**settings.TWITTER_OAUTH), domain='api.twitter.com/1.1') statuses = twit.lists.statuses(slug='mps', owner_screen_name='openparlca', include_rts=False, count=200) statuses.reverse() for status in statuses: try: pol = twitter_to_pol[status['user']['id']] except KeyError: logger.error("Can't find twitter ID %s (name %s)" % (status['user']['id'], status['user']['screen_name'])) continue if status['user']['screen_name'] not in screen_names: # Changed screen name pol.set_info('twitter', status['user']['screen_name']) date = datetime.date.fromtimestamp( email.utils.mktime_tz( email.utils.parsedate_tz( status['created_at']))) # f**k you, time formats guid = 'twit_%s' % status['id'] # Twitter apparently escapes < > but not & " # so I'm clunkily unescaping lt and gt then reescaping in the template text = status['text'].replace('<', '<').replace('>', '>') activity.save_activity({'text': status['text']}, politician=pol, date=date, guid=guid, variety='twitter')
def save_tweets(): twitter_to_pol = dict([(i.value.lower(), i.politician) for i in PoliticianInfo.objects.filter( schema='twitter').select_related('politician')]) twit = twitter.Twitter() statuses = twit.openparlca.lists.mps.statuses(per_page=200) statuses.reverse() for status in statuses: pol = twitter_to_pol[status['user']['screen_name'].lower()] date = datetime.date.fromtimestamp( email.utils.mktime_tz( email.utils.parsedate_tz( status['created_at']))) # f**k you, time formats guid = 'twit_%s' % status['id'] # Twitter apparently escapes < > but not & " # so I'm clunkily unescaping lt and gt then reescaping in the template text = status['text'].replace('<', '<').replace('>', '>') activity.save_activity({'text': status['text']}, politician=pol, date=date, guid=guid, variety='twitter')
def save_activity(self): activity.save_activity(self, politician=self.politician, date=self.votequestion.date)