def gather(database, source, config): # url should be like "http(s)://twitter.com/okfn" or simply "okfn" username = source.url[source.url.rfind('/') + 1:] user = tweepy.api.get_user(username) user_id = user.id user_realname = user.name statuses = tweepy.api.user_timeline(user_id, count=20) log.info("%s: %s" % (source.type, username)) table = database['activity'] for s in statuses: author = s.author.screen_name text = s.text dt = s.created_at url = "http://twitter.com/#!/%s/statuses/%d" % (username, s.id) data = { 'author': user_realname, 'title': text, 'source_url': url, 'description': text } data = make_activity(data, dt, source) table.writerow(data, unique_columns=['author', 'title', 'source_url'])
def gather_pipermail(database, source, how_many_months=1): '''Gather mailman archives info. :param how_many_months: how many months back to go in the archives. Set to <= 0 for unlimited. ''' url = source.url log.info(url) if 'mailman/listinfo' in url: url = url.replace('mailman/listinfo', 'pipermail') table = database['activity'] for message in get_messages(url, how_many_months): subjects = message.get_all('Subject') subject = subjects[-1] if subjects else '(No Subject)' dates = message.get_all('Date') date = dates[-1] if dates else '(No date)' date = date.rsplit(' +', 1)[0].rsplit(' -', 1)[0].strip() date = datetime.strptime(date, '%a, %d %b %Y %H:%M:%S') # do not save description here as large # description = message.get_payload() description = None data = { 'author': message.get_from().split(' ')[0], 'title': subject, 'description': description, 'source_url': url } data = make_activity(data, date, source) table.writerow(data, unique_columns=['author', 'title', 'datetime'])
def gather(database, source): feed = feedparser.parse(source.feed_url) try: log.info("%s: %s" % (source.type, feed.feed.title)) except AttributeError: log.error('Failed to retrieve: %s' % source.feed_url) table = database['activity'] count = 0 for e in feed.entries: count += 1 try: author = e.author_detail.name except AttributeError: try: author = e.author except AttributeError: author = '' try: description = e.summary except AttributeError: try: description = e.content[0].value except AttributeError: description = '' # HACKy if source.type == 'mediawiki': description = description.split('\n')[0] date = datetime.fromtimestamp(mktime(e.updated_parsed)) data = { 'author': author, 'title': e.title, 'source_url': e.link, 'description': description } data = make_activity(data, date, source) table.writerow( data, unique_columns=['author', 'title', 'source_url'] ) return count
def unique_place_id(): place_id = 1 while True: yield place_id place_id += 1 if __name__ == '__main__': place_id_iter = unique_place_id() activities = [] # Generate activities for each city for city in config.ROUTER_NAMES: router = common.OtpRouter(city) for i in range(0, config.ACTIVITY_NUM_PER_CITY): activities.append(common.make_activity(router)) # max workers set to 10, default is 2 session = FuturesSession(max_workers=10) # headers = {'Authorization': 'Token {}'.format(config.AUTH_TOKEN)} session.headers['Authorization'] = 'Token {}'.format(config.AUTH_TOKEN) futures = [] for activity in activities: start = datetime.now() from_id = next(place_id_iter) to_id = next(place_id_iter) url = config.LEADGEN_URL + 'activity/{}/{}/'.format(from_id, to_id) f = session.put(url, json=activity, background_callback=bg_cb) futures.append((f, start)) # wait for requests to complete
import requests import config import common def unique_place_id(): place_id = 337 while True: yield place_id place_id += 1 if __name__ == '__main__': place_id_iter = unique_place_id() activities = [] headers = {'Authorization': 'Token {}'.format(config.AUTH_TOKEN)} # Generate activities for each city for city in config.ROUTER_NAMES: router = common.OtpRouter(city) for i in range(0, config.ACTIVITY_NUM_PER_CITY): from_id = next(place_id_iter) to_id = next(place_id_iter) url = config.LEADGEN_URL + 'activity/{}/{}/'.format(from_id, to_id) activity = common.make_activity(router) r = requests.put(url, json=activity, headers=headers) print(r.status_code)