def update_github_prs(): def fetch_and_process(url): logging.debug("Following url %s" % url) response = raw_github_request( url, oauth_token=app.config['GITHUB_OAUTH_KEY']) link_header = parse_link_header(response.headers.get('Link', '')) prs = json.loads(response.content) now = datetime.utcnow() for pr in prs: updated_at = \ parse_datetime(pr['updated_at']).astimezone(tz.tzutc()).replace(tzinfo=None) is_fresh = (now - updated_at ).total_seconds() < app.config['FRESHNESS_THRESHOLD'] queue_name = ("fresh-prs" if is_fresh else "old-prs") taskqueue.add(url="/tasks/update-github-pr/%i" % pr['number'], queue_name=queue_name) for link in link_header.links: if link.rel == 'next': fetch_and_process(link.href) last_update_time = KVS.get("issues_since") url = ISSUES_BASE + "?sort=updated&state=all&per_page=100" if last_update_time: url += "&since=%s" % last_update_time fetch_and_process(url) KVS.put('issues_since', datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) return "Done fetching updated GitHub issues"
def update_jira_issues(): feed_url = "%s/activity?maxResults=20&streams=key+IS+%s&providers=issues" % \ (app.config['JIRA_API_BASE'], app.config['JIRA_PROJECT']) feed = feedparser.parse(feed_url) # To avoid double-processing of RSS feed entries, only process entries that are newer than # the watermark set during the last refresh: last_watermark = KVS.get("jira_sync_watermark") if last_watermark is not None: new_entries = [i for i in feed.entries if i.published_parsed > last_watermark] else: new_entries = feed.entries if not new_entries: return "No new entries to update since last watermark " + str(last_watermark) issue_ids = set(i.link.split('/')[-1] for i in new_entries) for issue in issue_ids: taskqueue.add(url="/tasks/update-jira-issue/" + issue, queue_name='jira-issues') KVS.put('jira_sync_watermark', new_entries[0].published_parsed) return "Queued JIRA issues for update: " + str(issue_ids)
def update_issues(): def fetch_and_process(url): logging.debug("Following url %s" % url) response = raw_request(url, oauth_token=app.config['GITHUB_OAUTH_KEY']) links = parse_link_value(response.headers.get('Link', '')) prs = json.loads(response.content) for pr in prs: taskqueue.add(url="/tasks/update-issue/%i" % pr['number']) for (link_url, info) in links.items(): if info.get('rel') == 'next': fetch_and_process(link_url) last_update_time = KVS.get("issues_since") url = ISSUES_BASE + "?sort=updated&state=all&per_page=100" if last_update_time: url += "&since=%s" % last_update_time fetch_and_process(url) KVS.put('issues_since', datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) return "Done fetching updated GitHub issues"
def update_github_prs(): last_update_time = KVS.get("issues_since") if last_update_time: last_update_time = \ parse_datetime(last_update_time).astimezone(tz.tzutc()).replace(tzinfo=None) else: # If no update has ever run successfully, store "now" as the watermark. If this update # task fails (because there are too many old PRs to load / backfill) then there's a chance # that this initial timestamp won't be the true watermark. If we are trying to bulk-load # old data then this should be done by calling /github/backfill-prs instead. last_update_time = datetime.min KVS.put('issues_since', datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) def fetch_and_process(url): logging.debug("Following url %s" % url) response = raw_github_request(url, oauth_token=oauth_token) prs = json.loads(response.content) now = datetime.utcnow() should_continue_loading = True update_time = last_update_time for pr in prs: updated_at = \ parse_datetime(pr['updated_at']).astimezone(tz.tzutc()).replace(tzinfo=None) update_time = max(update_time, updated_at) if updated_at < last_update_time: should_continue_loading = False break is_fresh = (now - updated_at ).total_seconds() < app.config['FRESHNESS_THRESHOLD'] queue_name = ("fresh-prs" if is_fresh else "old-prs") taskqueue.add(url=url_for(".update_pr", pr_number=pr['number']), queue_name=queue_name) if should_continue_loading: link_header = parse_link_header(response.headers.get('Link', '')) for link in link_header.links: if link.rel == 'next': fetch_and_process(link.href) return update_time update_time = \ fetch_and_process(get_pulls_base() + "?sort=updated&state=all&direction=desc&per_page=100") KVS.put('issues_since', update_time.strftime("%Y-%m-%dT%H:%M:%SZ")) return "Done fetching updated GitHub issues"
def update_github_prs(): last_update_time = KVS.get("issues_since") if last_update_time: last_update_time = \ parse_datetime(last_update_time).astimezone(tz.tzutc()).replace(tzinfo=None) else: # If no update has ever run successfully, store "now" as the watermark. If this update # task fails (because there are too many old PRs to load / backfill) then there's a chance # that this initial timestamp won't be the true watermark. If we are trying to bulk-load # old data then this should be done by calling /github/backfill-prs instead. last_update_time = datetime.min KVS.put('issues_since', datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ")) def fetch_and_process(url): logging.debug("Following url %s" % url) response = raw_github_request(url, oauth_token=oauth_token) prs = json.loads(response.content) now = datetime.utcnow() should_continue_loading = True update_time = last_update_time for pr in prs: updated_at = \ parse_datetime(pr['updated_at']).astimezone(tz.tzutc()).replace(tzinfo=None) update_time = max(update_time, updated_at) if updated_at < last_update_time: should_continue_loading = False break is_fresh = (now - updated_at).total_seconds() < app.config['FRESHNESS_THRESHOLD'] queue_name = ("fresh-prs" if is_fresh else "old-prs") taskqueue.add(url=url_for(".update_pr", pr_number=pr['number']), queue_name=queue_name) if should_continue_loading: link_header = parse_link_header(response.headers.get('Link', '')) for link in link_header.links: if link.rel == 'next': fetch_and_process(link.href) return update_time update_time = \ fetch_and_process(get_pulls_base() + "?sort=updated&state=all&direction=desc&per_page=100") KVS.put('issues_since', update_time.strftime("%Y-%m-%dT%H:%M:%SZ")) return "Done fetching updated GitHub issues"
def update_issues(): def fetch_and_process(url): logging.debug("Following url %s" % url) response = raw_github_request(url, oauth_token=app.config['GITHUB_OAUTH_KEY']) link_header = parse_link_header(response.headers.get('Link', '')) prs = json.loads(response.content) now = datetime.utcnow() for pr in prs: updated_at = \ parse_datetime(pr['updated_at']).astimezone(tz.tzutc()).replace(tzinfo=None) is_fresh = (now - updated_at).total_seconds() < app.config['FRESHNESS_THRESHOLD'] queue_name = ("fresh-prs" if is_fresh else "old-prs") taskqueue.add(url="/tasks/update-issue/%i" % pr['number'], queue_name=queue_name) for link in link_header.links: if link.rel == 'next': fetch_and_process(link.href) last_update_time = KVS.get("issues_since") url = ISSUES_BASE + "?sort=updated&state=all&per_page=100" if last_update_time: url += "&since=%s" % last_update_time fetch_and_process(url) KVS.put('issues_since', datetime.now().strftime("%Y-%m-%dT%H:%M:%SZ")) return "Done fetching updated GitHub issues"