def update(self, all=False, **args): from github import Github, to_datetime from argot import utils username = args["username"] self.handle = Github(username=username) repos = self.get_repos(username, all=all) commits = self.get_commits(username, repos, all=all) for repo in repos: sourceid = str(repo["id"]) if db.stream.find({"type":"github", "sourceid":sourceid}).count(): continue timestamp = int(time.mktime(to_datetime(repo["created_at"]).timetuple())) entry = { "sourceid": sourceid, "type": "github", "timestamp": timestamp, "title": "%s %sed @ %s" % (repo["name"], repo["event"], timestamp), "url": repo["url"], "data": json.dumps({"event":repo}), } db.stream.save(entry) for commit in commits: sourceid = str(commit["sha"]) if db.stream.find({"sourceid": sourceid}).count(): continue entry = {"type":"github", "sourceid":sourceid} timestamp = int(time.mktime(to_datetime(commit["commit"]["author"]["date"]).timetuple())) # This might block for some time: details = self.handle.repository(username, commit['repository']['name']).commit(commit["sha"]) commit.update(details) for mod in commit.get('modified', []): mod['htmldiff'] = utils.pygmentize(mod['diff'], 'diff', cssclass="diff") entry["timestamp"] = timestamp entry["title"] = "committed %s to %s" % (commit["sha"], commit['repository']['name']) if "message" not in commit: commit["message"] = commit["commit"]["message"] if commit['url'].startswith("https://api.github.com/repos"): commit["url"] = commit["url"].replace("https://api.github.com/repos", "") entry["url"] = ("https://github.com%s" % commit["url"]).replace("/commits/", "/commit/") entry["data"] = json.dumps({'event' : commit}) db.stream.save(entry)
def github_fix_1(): # fix urls import github fixed = 0 double_urls = db.stream.find({"type": "github", "url": {"$regex": "https://github.comhttp.*", "$options": "i"}}) for entry in double_urls: entry["url"] = entry["url"].replace("github.comhttps", "") db.stream.save(entry) fixed += 1 double_semi_urls = db.stream.find({"type": "github", "url": {"$regex": "https://://.*"}}) for entry in double_semi_urls: entry["url"] = entry["url"].replace("://://", "://") db.stream.save(entry) fixed += 1 api_urls = db.stream.find({"type": "github", "url": {"$regex": "https://api.github.*", "$options": "i"}}) for entry in api_urls: entry["url"] = "http://github.com%s" % (entry["url"].replace("https://api.github.com/repos", "")) db.stream.save(entry) fixed += 1 commits_urls = db.stream.find({"type":"github", "url": {"$regex": ".*/commits/.*"}}) for entry in commits_urls: entry["url"] = entry["url"].replace("/commits/", "/commit/") db.stream.save(entry) fixed += 1 # fix messages that are commits but do not have a message in the commit data entries = db.stream.find({"type": "github"}) for entry in entries: data = json.loads(entry["data"]) commit = data["event"] if "message" not in commit and "commit" in commit: commit["message"] = commit["commit"]["message"] entry["data"] = json.dumps({"event": commit}) db.stream.save(entry) fixed += 1 # fix timestamps on creates and forks entries = db.stream.find({"type": "github"}) for entry in entries: data = json.loads(entry["data"]) event = data["event"] if event["event"] != "commit": entry["timestamp"] = int(time.mktime(github.to_datetime(event["created_at"]).timetuple())) db.stream.save(entry) fixed += 1 else: if "committed_date" in event: ts = event["committed_date"] else: # fix the absense of "committed_date" key in older events ts = event["commit"]["author"]["date"] event["committed_date"] = ts entry["data"] = json.dumps({"event": event}) entry["timestamp"] = time.mktime(github.to_datetime(ts).timetuple()) db.stream.save(entry) fixed += 1 if fixed == 1: print "Fixed 1 entry" else: print "Fixed %d entries." % fixed