def add_story_screenshots(regenerate=False, article_id='storytext'): """ Utility. Used to generate a screenshot of every article. pass regenerate=true to regenerate all screenshots (otherwise it'll skip stories where that field already has a URL). Pass an articleid to specify the CSS ID of the article. The image will be cropped to that ID. """ if regenerate: for story in Story.select(): logger.info("About to check {0}".format(story.name)) story.screenshot = screenshotter.get_story_image(story_url=story.url, article_id=article_id) story.save() else: for story in Story.select().where(Story.screenshot == None): logger.info("About to check {0}".format(story.name)) story.screenshot = screenshotter.get_story_image(story_url=story.url, article_id=article_id) logger.info("Got screenshot {0}".format(story.screenshot)) story.save()
def add_story_screenshots(regenerate=False): if regenerate: for story in Story.select(): logger.info("About to check %s" % (story.name)) story.screenshot = screenshotter.get_story_image(story.url) story.save() else: for story in Story.select().where(Story.screenshot == None): logger.info("About to check %s" % (story.name)) story.screenshot = screenshotter.get_story_image(story.url) story.save()
def test_change_tracking(self, mock_get_channel_name): """ Check if we can start tracking a URL, then update the slugs that are tracked on it """ mock_get_channel_name.return_value = "default-channel" clear_stories() tracker = NPRStartTracking() class FakeMessage(object): body = { "text": "@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong", "channel": "default-channel", } expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong" message = tracker.respond(FakeMessage) assert expected in message["text"] # Now try to change the slug FakeMessage.body[ "text" ] = "@carebot track slug-a-b-c,slug-x-y-z on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong" message = tracker.respond(FakeMessage) results = Story.select() self.assertEqual(len(results), 1) self.assertEqual(results[0].url, "http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong") self.assertEqual(results[0].slug, "slug-a-b-c,slug-x-y-z")
def handle_overview_question(message): message.reply( "Let me check what's been happening. This may take a second.") seven_days_ago = datetime.datetime.now() - datetime.timedelta(days=7) stories = Story.select().where(Story.tracking_started > seven_days_ago) slugs = Set() for story in stories: print story.name story_slugs = story.slug.split(',') for slug in story_slugs: slugs.add(slug) total_users = analytics.get_user_data(start_date='7daysAgo') total_users = int(total_users['rows'][0][0]) total_users = "{:,}".format(total_users) median = analytics.get_linger_rate(start_date='7daysAgo') linger_rows = analytics.get_linger_rows(start_date='7daysAgo') linger_histogram_url = ChartTools.linger_histogram_link( linger_rows, median) attachments = [{ "fallback": "linger update", "color": "#eeeeee", "title": "Time spent on graphics over the last week", "image_url": linger_histogram_url }] slackTools.send_message( message.body['channel'], "In the past 7 days, I've tracked %s stories and %s graphics." % (len(stories), len(slugs))) slackTools.send_message( message.body['channel'], "%s people looked at graphics on those stories. Here's how much time they spent:" % total_users, attachments, unfurl_links=False) fields = [] for story in stories: print "Adding %s" % story.name fields.append({ "title": story.name.strip(), "value": '<' + story.url + '|' + story.slug.strip() + '>', "short": True }) attachments = [{ "fallback": "linger update", "color": "#eeeeee", # "title": "What we have done", "fields": fields }] slackTools.send_message(message.body['channel'], "Here's everything:", attachments, unfurl_links=False)
def test_change_tracking(self, mock_get_channel_name): """ Check if we can start tracking a URL, then update the slugs that are tracked on it """ mock_get_channel_name.return_value = 'default-channel' clear_stories() tracker = NPRStartTracking() class FakeMessage(object): body = { 'text': '@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong', 'channel': 'default-channel' } expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong" message = tracker.respond(FakeMessage) assert expected in message['text'] # Now try to change the slug FakeMessage.body['text'] = '@carebot track slug-a-b-c,slug-x-y-z on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong' message = tracker.respond(FakeMessage) results = Story.select() self.assertEqual(len(results), 1) self.assertEqual(results[0].url, 'http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong') self.assertEqual(results[0].slug, 'slug-a-b-c,slug-x-y-z')
def get_story_stats(): analytics = GoogleAnalyticsScraper() # TODO use a SQL query instead of app logic to exclude stories that are # too old. for story in Story.select(): logger.info("About to check %s" % (story.name)) story_time_bucket = time_bucket(story.date) last_bucket = story.last_bucket # Check when the story was last reported on if last_bucket: # Skip stories that have been checked recently # And stories that are too old. if (last_bucket == story_time_bucket): logger.info("Checked recently. Bucket is still %s" % (story_time_bucket)) continue if not story_time_bucket: logger.info("Story is too new; skipping for now") continue # Some stories have multiple slugs stats_per_slug = analytics.get_linger_data_for_story(story) if len(stats_per_slug) is not 0: slackTools.send_linger_time_update(story, stats_per_slug, story_time_bucket) # Mark the story as checked story.last_checked = datetime.datetime.now(pytz.timezone('US/Eastern')) story.last_bucket = story_time_bucket story.save()
def write(self, stories, team=None): new_stories = [] for story in stories: info_from_api = npr_api_scraper.get_story_details(story['story_url']) if not info_from_api: logger.info('Not adding %s to database: could not get story' % (story['story_headline'])) pass exists = Story.select().where(Story.url == story['story_url']) if exists: logger.info('Not adding %s to database: already exists' % (story['story_headline'])) else: try: screenshot_url = screenshotter.get_story_image(story['story_url']) story = Story.create( name = story['story_headline'].strip(), slug = story['graphic_slug'].strip(), date = info_from_api['date'], story_type = story['graphic_type'].strip(), url = story['story_url'].strip(), image = info_from_api['image'], team = team, screenshot = screenshot_url ) new_stories.append(story) except IntegrityError: # Story probably already exists. logger.info('Not adding %s to database: probably already exists' % (story['story_headline'])) pass return new_stories
def respond(self, message): """ Respond to requests about the last seven days of data TODO: Loop over all stories and report stats on each """ seven_days_ago = datetime.datetime.now() - datetime.timedelta(days=7) stories = Story.select().where(Story.tracking_started > seven_days_ago) slugs = Set() for story in stories: # print story.name story_slugs = story.slug.split(',') for slug in story_slugs: slugs.add(slug) try: team = self.config.get_team_for_story(stories[0]) except: team = self.config.get_default_team() total_users = self.get_user_data(team=team, start_date='7daysAgo') total_users = int(total_users['rows'][0][0]) total_users = "{:,}".format(total_users) npr_linger = NPRLingerRate() linger_rows = npr_linger.get_linger_data(team=team, start_date='7daysAgo') median = NPRLingerRate.get_median(linger_rows) linger_histogram_url = npr_linger.get_histogram_url(linger_rows, median) attachments = [{ "fallback": "linger update", "color": "#eeeeee", "title": "Time spent on graphics over the last week", "image_url": linger_histogram_url }] text = "In the past 7 days, I've tracked {0} stories and {1} graphics.".format(len(stories), len(slugs)) text += "\n\n" text += "{0} people looked at graphics on the property. Here's how much time they spent:".format(total_users) fields = [] for story in stories: fields.append({ "title": story.name.strip(), "value": "<{0}|{1}>".format(story.url, story.slug.strip()), "short": True }) attachments.append({ "fallback": "linger update", "color": "#eeeeee", # "title": "What we have done", "fields": fields }) return { 'text': text, 'attachments': attachments }
def test_write_spreadsheet_duplicates(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') # Insert the stories scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) # Now insert them again and make sure we don't have duplicates scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4)
def handle_slug_inquiry(self, message): """ Respond to an inquiry about the slug with stats and charts """ match = re.search(self.SLUG_SEARCH_REGEX, message.body["text"]) slug = match.group(1) if slug: # Try to match the story to a slug to accurately get a team # The Google Analytics property ID comes from the team config # We use the default team if none is found stories = Story.select().where(Story.slug.contains(slug)) team = self.config.get_team_for_stories(stories) linger_rows = self.get_linger_data(team=team, slug=slug) if not linger_rows: return {"text": "Sorry, I wasn't able to find linger rate stats for %s" % slug} median = NPRLingerRate.get_median(linger_rows) print "Got median" print median people = "{:,}".format(median["total_people"]) time_text = TimeTools.humanist_time_bucket(median) reply = u"*%s* people spent a median *%s* on `%s`." % (people, time_text, slug) reply += "\n\nThis graphic appears in %s %s I am tracking:" % ( inflector.number_to_words(len(stories)), inflector.plural("story", len(stories)), ) for story in stories: reply += "\n" + "*<%s|%s>*" % (story.url, story.name.strip()) # Get linger rate data for charting. all_graphics_rows = self.get_linger_data(team=team) all_graphics_median = NPRLingerRate.get_median(all_graphics_rows) attachments = [ { "fallback": slug + " update", "color": "#eeeeee", "title": slug, "image_url": self.get_histogram_url(linger_rows, median), }, { "fallback": slug + " update", "color": "#eeeeee", "title": "How all graphics performed", "image_url": self.get_histogram_url(all_graphics_rows, all_graphics_median), }, ] return {"text": reply, "attachments": attachments}
def respond(self, message): m = re.search(self.START_TRACKING_REGEX, message.body['text']) url = re.search(self.GRUBER_URLINTEXT_PAT, message.body['text']) if not m: return False slug = m.group(1) url = url.group(1) if slug: # Check if the slug is in the database. try: story = Story.select().where(Story.url.contains(url)).get() story.slug = slug story.save() text = "Ok! I'm already tracking `%s`, and I've updated the slug." % url except Story.DoesNotExist: # If it's not in the database, start tracking it. if not url: logger.error("Couldn't find story URL in message %s", message.body['text']) text = "Sorry, I need a story URL to start tracking." return details = npr_api_scraper.get_story_details(url) if not details: logger.error("Couldn't find story in API for URL %s", url) text = "Sorry, I wasn't able to find that story in the API, so I couldn't start tracking it." return # Find out what team we need to save this story to channel = slack_tools.get_channel_name(message.body['channel']) team = self.config.get_team_for_channel(channel) # Create the story story = Story.create(name=details['title'], slug=slug, date=details['date'], url=url, image=details['image'], team=team ) story.save() text = "Ok, I've started tracking `%s` on %s. The first stats should arrive in 4 hours or less." % (slug, url) else: text = "Sorry, I wasn't able to start tracking `%s` right now." % slug if text: return { 'text': text }
def start_tracking(message): m = re.search(START_TRACKING_REGEX, message.body['text']) if not m: return False slug = m.group(1) if slug: # Check if the slug is in the database. try: story = Story.select().where(Story.slug.contains(slug)).get() message.reply( "Thanks! I'm already tracking `%s`, and you should start seeing results within a couple hours." % slug) except Story.DoesNotExist: # If it's not in the database, start tracking it. url = re.search(GRUBER_URLINTEXT_PAT, message.body['text']) if not url: logger.error("Couldn't find story URL in message %s", message.body['text']) message.reply("Sorry, I need a story URL to start tracking.") return details = npr_api_scraper.get_story_details(url.group(1)) if not details: logger.error("Couldn't find story in API for URL %s", url.group(1)) message.reply( "Sorry, I wasn't able to find that story in the API, so I couldn't start tracking it." ) return # Find out what team we need to save this story to channel = slackTools.get_channel_name(message.body['channel']) team = config.get_team_for_channel(channel) # Create the story story = Story.create(name=details['title'], slug=slug, date=details['date'], url=url.group(1), image=details['image'], team=team) story.save() message.reply( "Ok, I've started tracking `%s`. The first stats should arrive in 4 hours or less." % slug) else: message.reply( "Sorry, I wasn't able to start tracking `%s` right now." % slug)
def respond(self, message): m = re.search(self.START_TRACKING_REGEX, message.body['text']) url = re.search(self.GRUBER_URLINTEXT_PAT, message.body['text']) if not m: return False slug = m.group(1) url = url.group(1) if slug: # Check if the slug is in the database. try: story = Story.select().where(Story.url.contains(url)).get() story.slug = slug story.save() text = "Ok! I'm already tracking `%s`, and I've updated the slug." % url except Story.DoesNotExist: # If it's not in the database, start tracking it. if not url: logger.error("Couldn't find story URL in message %s", message.body['text']) text = "Sorry, I need a story URL to start tracking." return details = npr_api_scraper.get_story_details(url) if not details: logger.error("Couldn't find story in API for URL %s", url) text = "Sorry, I wasn't able to find that story in the API, so I couldn't start tracking it." return # Find out what team we need to save this story to channel = slack_tools.get_channel_name(message.body['channel']) team = self.config.get_team_for_channel(channel) # Create the story story = Story.create(name=details['title'], slug=slug, date=details['date'], url=url, image=details['image'], team=team) story.save() text = "Ok, I've started tracking `%s` on %s. The first stats should arrive in 4 hours or less." % ( slug, url) else: text = "Sorry, I wasn't able to start tracking `%s` right now." % slug if text: return {'text': text}
def add_story_screenshots(regenerate=False, article_id="storytext"): """ Utility. Used to generate a screenshot of every article. pass regenerate=true to regenerate all screenshots (otherwise it'll skip stories where that field already has a URL). Pass an articleid to specify the CSS ID of the article. The image will be cropped to that ID. """ if regenerate: for story in Story.select(): logger.info("About to check {0}".format(story.name)) story.screenshot = screenshotter.get_story_image(story_url=story.url, article_id=article_id) story.save() else: for story in Story.select().where(Story.screenshot == None): logger.info("About to check {0}".format(story.name)) story.screenshot = screenshotter.get_story_image(story_url=story.url, article_id=article_id) logger.info("Got screenshot {0}".format(story.screenshot)) story.save()
def handle_pageviews_inquiry(self, message): match = re.search(self.UNSUCK_SLUG_SEARCH_REGEX, message.body["text"]) slug = match.group(1) stories = Story.select().where(Story.slug.contains(slug)) team = config.get_team_for_stories(stories) unique_pageviews = self.get_unique_visitor_data(team=team, slug=slug) unique_pageviews = int(unique_pageviews) unique_pageviews = "{:,}".format(unique_pageviews) if not unique_pageviews: return {"text": "Sorry, I wasn't able to find unique visitor stats for %s" % slug} return {"text": "`{0}` has had *{1}* unique pageviews".format(slug, unique_pageviews)}
def get_story_stats(): """ Loop through every story we know about. If there hasn't been an update recently, fetch stats for that article. """ # TODO use a SQL query instead of app logic to exclude stories that are # too old. for story in Story.select(): logger.info("About to check %s" % (story.name)) team = config.get_team_for_story(story) story_time_bucket = time_bucket(story.date) last_bucket = story.last_bucket # Check when the story was last reported on if last_bucket: # Skip stories that have been checked recently # And stories that are too old. if last_bucket == story_time_bucket: logger.info("Checked recently. Bucket is still %s", story_time_bucket) continue if not story_time_bucket: logger.info("Story is too new; skipping for now") continue plugins = [getattr(importlib.import_module(mod), cls) for (mod, cls) in (plugin.rsplit(".", 1) for plugin in team['plugins'])] for plugin in plugins: plugin = plugin() try: message = plugin.get_update_message(story) if message: slack_tools.send_message( team['channel'], message['text'], message.get('attachments', None) ) except NotImplementedError: pass # Mark the story as checked story.last_checked = datetime.datetime.now(pytz.timezone(app_config.PROJECT_TIMEZONE)) story.last_bucket = story_time_bucket story.save()
def test_write_spreadsheet(self, mock_upload): mock_upload.return_value = 'http://image-url-here' clear_stories() scraper = SpreadsheetScraper() stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) for idx, story in enumerate(stories): self.assertEqual(results[idx].name, story['story_headline']) self.assertEqual(results[idx].url, story['story_url'])
def get_story_stats(): """ Loop through every story we know about. If there hasn't been an update recently, fetch stats for that article. """ # TODO use a SQL query instead of app logic to exclude stories that are # too old. for story in Story.select(): logger.info("About to check %s" % (story.name)) team = config.get_team_for_story(story) story_time_bucket = time_bucket(story.date) last_bucket = story.last_bucket # Check when the story was last reported on if last_bucket: # Skip stories that have been checked recently # And stories that are too old. if last_bucket == story_time_bucket: logger.info("Checked recently. Bucket is still %s", story_time_bucket) continue if not story_time_bucket: logger.info("Story is too new; skipping for now") continue plugins = [ getattr(importlib.import_module(mod), cls) for (mod, cls) in (plugin.rsplit(".", 1) for plugin in team["plugins"]) ] for plugin in plugins: plugin = plugin() try: message = plugin.get_update_message(story) if message: slack_tools.send_message(team["channel"], message["text"], message.get("attachments", None)) except NotImplementedError: pass # Mark the story as checked story.last_checked = datetime.datetime.now(pytz.timezone(app_config.PROJECT_TIMEZONE)) story.last_bucket = story_time_bucket story.save()
def handle_linger_update(message): if 'doing' not in message.body['text']: return m = GRUBER_URLINTEXT_PAT.findall(message.body['text']) if not m[0]: return url = str(m[0][0]) url = url.replace('&', '&') logger.info("Looking for url %s" % url) try: story = Story.select().where(Story.url == url).get() except: message.reply("Sorry, I don't have stats for %s" % url) return story_time_bucket = story.time_bucket() stats_per_slug = analytics.get_linger_data_for_story(story) if len(stats_per_slug) is not 0: reply = ("Here's what I know about the graphics on _%s_:") % ( story.name.strip()) fields = [] for stat in stats_per_slug: time = TimeTools.humanist_time_bucket(stat['stats']) fields.append({ "title": stat['slug'], "value": time, "short": True }) attachments = [{ "fallback": story.name + " update", "color": "#eeeeee", "title": story.name, "title_link": story.url, "fields": fields }] # Use send_message instead of message.reply, otherwise we lose # the bot icon. slackTools.send_message(message.body['channel'], reply, attachments)
def test_write_spreadsheet(self, mock_upload): """ Make sure we save the stories to the database when scraping from a spreadsheet """ clear_stories() scraper = SpreadsheetScraper(self.source) stories = scraper.scrape_spreadsheet('tests/data/stories.xlsx') scraper.write(stories) results = Story.select() self.assertEqual(len(results), 4) for idx, story in enumerate(stories): self.assertEqual(results[idx].name, story['story_headline']) self.assertEqual(results[idx].url, story['story_url'])
def handle_scroll_slug_question(message): m = re.search(SCROLL_RATE_REGEX, message.body['text']) if not m: return slug = m.group(1) if slug: stories = Story.select().where(Story.slug.contains(slug)) rows = analytics.get_depth_rate(slug) if rows: reply = u"Here's what I know about `%s`." % slug reply += '\n\nThis graphic appears in %s %s:' % ( inflector.number_to_words( len(stories)), inflector.plural('story', len(stories))) for story in stories: reply += '\n' + '*<%s|%s>*' % (story.url, story.name.strip()) histogram_url = ChartTools.scroll_histogram_link(rows) if story.screenshot: histogram_url = ChartTools.add_screenshot_to_chart( story.screenshot, histogram_url) attachments = [{ "fallback": slug + " update", "color": "#eeeeee", "title": slug, "image_url": histogram_url }] slackTools.send_message(message.body['channel'], reply, attachments, unfurl_links=False) else: message.reply("I wasn't able to find scroll data for %s" % slug)
def handle_url_inquiry(self, message): """ Respond to "How is http://example.com/foo doing?" """ if "doing" not in message.body["text"]: return match = self.GRUBER_URLINTEXT_PAT.findall(message.body["text"]) if not match[0]: return url = str(match[0][0]) url = url.replace("&", "&") logger.info("Looking for url %s" % url) try: story = Story.select().where(Story.url == url).get() except: return {"text": "Sorry, I don't have stats for %s" % url} return self.get_update_message(story)
def handle_url_inquiry(self, message): """ Respond to "How is http://example.com/foo doing?" """ if 'doing' not in message.body['text']: return match = self.GRUBER_URLINTEXT_PAT.findall(message.body['text']) if not match[0]: return url = str(match[0][0]) url = url.replace('&', '&') logger.info("Looking for url %s" % url) try: story = Story.select().where(Story.url == url).get() except: return {'text': "Sorry, I don't have stats for %s" % url} return self.get_update_message(story)
def get_slug_message(self, slug, story=None): # Try to match the story to a slug to accurately get a team # The Google Analytics property ID comes from the team config # We use the default team if none is found stories = Story.select().where(Story.slug.contains(slug)) team = self.config.get_team_for_stories(stories) params = self.get_slug_query_params(team=team, slug=slug) data = GoogleAnalytics.query_ga(params) if not data.get('rows'): logger.info('No rows found for slug %s' % slug) return # Clean up the data clean_data = self.clean_data(data.get('rows')) total_people = self.get_total_people(clean_data) friendly_people = "{:,}".format(total_people) # Comma-separated #s median = self.get_median(clean_data) # Set up the chart scroll_histogram_url = self.get_chart(clean_data) if story: scroll_histogram_url = ChartTools.add_screenshot_to_chart(story, scroll_histogram_url) # TODO: Not confident in median calculations so far # text = "*%s people* got a median of *%s percent* down the page." % (friendly_people, median) text = '' attachments = [{ "fallback": slug + " update", "color": "#eeeeee", "title": "How far down did people scroll?", "image_url": scroll_histogram_url }] return { 'text': text, 'attachments': attachments }
def test_start_tracking(self, mock_get_channel_name): """ Test if we can start tracking a new story given only a NPR URL and a graphic slug """ mock_get_channel_name.return_value = "default-channel" clear_stories() tracker = NPRStartTracking() class FakeMessage(object): body = { "text": "@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong", "channel": "default-channel", } expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong" message = tracker.respond(FakeMessage) print message assert expected in message["text"] results = Story.select() self.assertEqual(len(results), 1) self.assertEqual(results[0].url, "http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong")
def handle_pageviews_inquiry(self, message): match = re.search(self.UNSUCK_SLUG_SEARCH_REGEX, message.body['text']) slug = match.group(1) stories = Story.select().where(Story.slug.contains(slug)) team = config.get_team_for_stories(stories) unique_pageviews = self.get_unique_visitor_data(team=team, slug=slug) unique_pageviews = int(unique_pageviews) unique_pageviews = "{:,}".format(unique_pageviews) if not unique_pageviews: return { 'text': "Sorry, I wasn't able to find unique visitor stats for %s" % slug } return { 'text': '`{0}` has had *{1}* unique pageviews'.format( slug, unique_pageviews) }
def test_start_tracking(self, mock_get_channel_name): """ Test if we can start tracking a new story given only a NPR URL and a graphic slug """ mock_get_channel_name.return_value = 'default-channel' clear_stories() tracker = NPRStartTracking() class FakeMessage(object): body = { 'text': '@carebot track slug-a-b-c on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong', 'channel': 'default-channel' } expected = "Ok, I've started tracking `slug-a-b-c` on http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong" message = tracker.respond(FakeMessage) print message assert expected in message['text'] results = Story.select() self.assertEqual(len(results), 1) self.assertEqual(results[0].url, 'http://www.npr.org/sections/13.7/2016/02/16/466109612/was-einstein-wrong')
def write(self, stories, team=None): new_stories = [] for story in stories: info_from_api = npr_api_scraper.get_story_details( story['story_url']) if not info_from_api: logger.info('Not adding %s to database: could not get story' % (story['story_headline'])) pass exists = Story.select().where(Story.url == story['story_url']) if exists: logger.info('Not adding %s to database: already exists' % (story['story_headline'])) else: try: screenshot_url = screenshotter.get_story_image( story['story_url']) story = Story.create( name=story['story_headline'].strip(), slug=story['graphic_slug'].strip(), date=info_from_api['date'], story_type=story['graphic_type'].strip(), url=story['story_url'].strip(), image=info_from_api['image'], team=team, screenshot=screenshot_url) new_stories.append(story) except IntegrityError: # Story probably already exists. logger.info( 'Not adding %s to database: probably already exists' % (story['story_headline'])) pass return new_stories
def handle_slug_question(message): m = re.search(LINGER_RATE_REGEX, message.body['text']) if not m: return slug = m.group(1) if slug: median = analytics.get_linger_rate(slug) stories = Story.select().where(Story.slug.contains(slug)) message.reply("Ok! I'm looking up %s. This may take a second." % slug) if median: people = "{:,}".format(median['total_people']) time_text = TimeTools.humanist_time_bucket(median) reply = u"*%s* people spent a median *%s* on `%s`." % ( people, time_text, slug) # List the stories this slug appears on reply += '\n\nThis graphic appears in %s %s:' % ( inflector.number_to_words( len(stories)), inflector.plural('story', len(stories))) for story in stories: reply += '\n' + '*<%s|%s>*' % (story.url, story.name.strip()) # Get linger rate data linger_rows = analytics.get_linger_rows(slug) linger_histogram_url = ChartTools.linger_histogram_link( linger_rows, median) all_graphics_rows = analytics.get_linger_rows() all_graphics_median = analytics.get_linger_rate() all_histogram = ChartTools.linger_histogram_link( all_graphics_rows, all_graphics_median) attachments = [{ "fallback": slug + " update", "color": "#eeeeee", "title": slug, "image_url": linger_histogram_url }, { "fallback": slug + " update", "color": "#eeeeee", "title": "How all graphics performed", "image_url": all_histogram }] # Get scroll data, if any. scroll_depth_rows = analytics.get_depth_rate(slug) if scroll_depth_rows: scroll_histogram_url = ChartTools.scroll_histogram_link( scroll_depth_rows) if stories[0].screenshot: scroll_histogram_url = ChartTools.add_screenshot_to_chart( stories[0].screenshot, scroll_histogram_url) attachments.append({ "fallback": slug + " update", "color": "#eeeeee", "title": "How far down did people scroll?", "image_url": scroll_histogram_url }) slackTools.send_message(message.body['channel'], reply, attachments, unfurl_links=False) else: message.reply("I wasn't able to figure out the linger rate of %s" % slug)
def handle_slug_inquiry(self, message): """ Respond to an inquiry about the slug with stats and charts """ match = re.search(self.SLUG_SEARCH_REGEX, message.body['text']) slug = match.group(1) if slug: # Try to match the story to a slug to accurately get a team # The Google Analytics property ID comes from the team config # We use the default team if none is found stories = Story.select().where(Story.slug.contains(slug)) team = self.config.get_team_for_stories(stories) linger_rows = self.get_linger_data(team=team, slug=slug) if not linger_rows: return { 'text': "Sorry, I wasn't able to find linger rate stats for %s" % slug } median = NPRLingerRate.get_median(linger_rows) print "Got median" print median people = "{:,}".format(median['total_people']) time_text = TimeTools.humanist_time_bucket(median) reply = u"*%s* people spent a median *%s* on `%s`." % ( people, time_text, slug) reply += '\n\nThis graphic appears in %s %s I am tracking:' % ( inflector.number_to_words( len(stories)), inflector.plural('story', len(stories))) for story in stories: reply += '\n' + '*<%s|%s>*' % (story.url, story.name.strip()) # Get linger rate data for charting. all_graphics_rows = self.get_linger_data(team=team) all_graphics_median = NPRLingerRate.get_median(all_graphics_rows) attachments = [{ "fallback": slug + " update", "color": "#eeeeee", "title": slug, "image_url": self.get_histogram_url(linger_rows, median) }, { "fallback": slug + " update", "color": "#eeeeee", "title": "How all graphics performed", "image_url": self.get_histogram_url(all_graphics_rows, all_graphics_median) }] return {'text': reply, 'attachments': attachments}
def respond(self, message): """ Respond to requests about the last seven days of data TODO: Loop over all stories and report stats on each """ seven_days_ago = datetime.datetime.now() - datetime.timedelta(days=7) stories = Story.select().where(Story.tracking_started > seven_days_ago) slugs = Set() for story in stories: # print story.name story_slugs = story.slug.split(',') for slug in story_slugs: slugs.add(slug) try: team = self.config.get_team_for_story(stories[0]) except: team = self.config.get_default_team() total_users = self.get_user_data(team=team, start_date='7daysAgo') total_users = int(total_users['rows'][0][0]) total_users = "{:,}".format(total_users) npr_linger = NPRLingerRate() linger_rows = npr_linger.get_linger_data(team=team, start_date='7daysAgo') median = NPRLingerRate.get_median(linger_rows) linger_histogram_url = npr_linger.get_histogram_url( linger_rows, median) attachments = [{ "fallback": "linger update", "color": "#eeeeee", "title": "Time spent on graphics over the last week", "image_url": linger_histogram_url }] text = "In the past 7 days, I've tracked {0} stories and {1} graphics.".format( len(stories), len(slugs)) text += "\n\n" text += "{0} people looked at graphics on the property. Here's how much time they spent:".format( total_users) fields = [] for story in stories: fields.append({ "title": story.name.strip(), "value": "<{0}|{1}>".format(story.url, story.slug.strip()), "short": True }) attachments.append({ "fallback": "linger update", "color": "#eeeeee", # "title": "What we have done", "fields": fields }) return {'text': text, 'attachments': attachments}