def __getitem__(self, key): logger.debug("gmane: Getting article %d for archive %s" % (key, self.group)) article = self.gmane.article(str(key)) message = "\n".join(article[-1]) mail = email.message_from_string(message) mail.date=datetime.datetime.fromtimestamp(mktime_tz(parsedate_tz(mail['date']))) self.add_sender(mail) return mail
def _get_location(self): if not self._location: while self._location_func: lf = self._location_func.pop() location = lf() if location and location[1] > self.location_quality: logger.debug("users: Location %s found to replace %s for %s" % (location, self._location, self)) self._location = location return self._location
def get_user_by_name(self, name): user = [u for u in self.users if name == u.username] if user: user = user[0] logger.debug("users: Found user %s when searching for %s by exact username match" % (user.name, name)) return user user = [u for u in self.users if name_cmp(u.name, name)][0] logger.debug("users: Found user %s when searching for %s" % (user.name, name)) return user
def __getitem__(self, key): logger.debug("gmane: Getting article %d for archive %s" % (key, self.group)) article = self.gmane.article(str(key)) message = "\n".join(article[-1]) mail = email.message_from_string(message) mail.date = datetime.datetime.fromtimestamp( mktime_tz(parsedate_tz(mail['date']))) self.add_sender(mail) return mail
def commits_since(self, date): commits = {commit for commit in self.commits() if commit.date > date} if len(commits) > 5: author=attrgetter("author") grouped = itertools.groupby(sorted(commits, key=author),key=author) grouped_commits = set() for user, commits in grouped: logger.debug("git: Combined commits by %s in %s" % (user, self.package)) grouped_commits.add(reduce(Commit.__add__, commits)) commits = grouped_commits return commits
def _get_location(self): if not self._location: while self._location_func: lf = self._location_func.pop() location = lf() if location and location[1] > self.location_quality: logger.debug( "users: Location %s found to replace %s for %s" % (location, self._location, self)) self._location = location return self._location
def get_user_by_name(self, name): user = [u for u in self.users if name == u.username] if user: user = user[0] logger.debug( "users: Found user %s when searching for %s by exact username match" % (user.name, name)) return user user = [u for u in self.users if name_cmp(u.name, name)][0] logger.debug("users: Found user %s when searching for %s" % (user.name, name)) return user
def _get_github_accounts(self, accounts, token): users = getUtility(IUserDatabase) i = 0.0 for account in accounts: i+=1 if account in ALREADY_FOUND: continue url = "https://api.github.com/users/%s?access_token=%s" % (account,token) logger.debug("github: getting %s" % url) gh_api = urllib2.urlopen(url) user = json.loads(gh_api.read()) ALREADY_FOUND.add(account) users.add_user(User(user.get('name', user['login']), user.get('email', None), username=user['login'], location_func=lazy_location(user)))
def get_data(self, token, checkout_directory): self.token = token data = set() if checkout_directory == "temp": checkout_directory = None five_days_ago = (datetime.datetime.now() - datetime.timedelta(days=5)).date().isoformat() url = "https://api.github.com/orgs/%s/issues?access_token=%s&filter=all&since=%s" % (self.org, self.token, five_days_ago) while True: logger.debug("github: getting %s" % url) issues_resp = urllib2.urlopen(url) issues = json.loads(issues_resp.read()) for issue in issues: data.add(Issue(issue)) links = LINKS.findall(issues_resp.headers.get('Link', '')) links = {link[1]:link[0] for link in links} if 'next' in links: logger.debug("github: %s has too many issues, requesting more" % (self.org)) url = links['next'] else: logger.info("github: Got all issues for %s" % (self.org)) break url = "https://api.github.com/orgs/%s/repos?access_token=%s&type=public" % (self.org, self.token) self.repos = {} while True: logger.debug("github: getting %s" % url) org_repos_resp = urllib2.urlopen(url) org_repos = json.loads(org_repos_resp.read()) for repo in org_repos: repo_name = repo['name'] repo_url = repo['git_url'] if checkout_directory: location = os.path.join(checkout_directory, repo_name) else: location = None logger.info("github: Getting changes for %s/%s" % (self.org, repo_name)) self.repos[repo_name] = GitRepo(repo_url, location=location) links = LINKS.findall(org_repos_resp.headers.get('Link', '')) links = {link[1]:link[0] for link in links} if 'next' in links: logger.debug("github: %s has too many repos, requesting more" % (self.org)) url = links['next'] else: logger.info("github: Got all repos for %s" % (self.org)) break logger.debug("github: Got data for %d repos in %s" % (len(self.repos), self.org)) for repo in self.repos.values(): data |= repo.get_data() return data
def get_questions_since(self, date, method='activity'): ts_from = calendar.timegm(date.utctimetuple()) ts_to = calendar.timegm(datetime.datetime.now().utctimetuple()) url = BASE url += "&sort=%s" % (method) url += "&min=%d&max=%d" % (ts_from, ts_to) url += "&tagged=%s" % (";".join(self.tags)) logger.debug("stackoverflow: getting %s" % url) resp = urllib2.urlopen(url).read() resp = gzip.GzipFile(fileobj=StringIO(resp)).read() questions = json.loads(resp)['items'] logger.info("stackoverflow: Getting questions for tags: %s" % (",".join(self.tags))) questions = map(Question, questions) return questions
def email_location(): recieved = message.get_all('Original-Received') ips = [IP.findall(h) for h in recieved] ips = [ip[0] for ip in ips if ip and not ip[0].startswith("10.") and not ip[0].startswith("192.168")] likely = ips[-1] try: logger.info("geocoder: Getting location for %s" % (likely)) url = "http://freegeoip.net/json/%s"%likely logger.debug("geocoder: Fetching %s" % (url)) loc = json.loads(urllib2.urlopen(url).read()) ll = float(loc['latitude']), float(loc['longitude']) if any(ll): return ll, 0 except: pass
def get_location(): if not user.get("location", None): return None try: result = geocoder.geocode(user['location'])[0] coords = result.coordinates types = result.data['types'] interesting_types = [t for t in types if t in ORDERED_TYPES] quality = max([ORDERED_TYPES.index(t) for t in interesting_types]) if not quality: quality = 1 # We trust github more than geoip logger.debug("geocoder: Getting coordinates for user %s at %s == %s (%s)" % (user['login'], user['location'], `coords`, " ".join(interesting_types))) return coords, quality except: return None, None
def email_location(): recieved = message.get_all('Original-Received') ips = [IP.findall(h) for h in recieved] ips = [ ip[0] for ip in ips if ip and not ip[0].startswith("10.") and not ip[0].startswith("192.168") ] likely = ips[-1] try: logger.info("geocoder: Getting location for %s" % (likely)) url = "http://freegeoip.net/json/%s" % likely logger.debug("geocoder: Fetching %s" % (url)) loc = json.loads(urllib2.urlopen(url).read()) ll = float(loc['latitude']), float(loc['longitude']) if any(ll): return ll, 0 except: pass
def get_users(self, token): url = "https://api.github.com/orgs/%s/members?access_token=%s" % (self.org, token) while True: logger.debug("github: getting %s" % url) members_resp = urllib2.urlopen(url) members = json.loads(members_resp.read()) all_members = set() for member in members: all_members.add(member['login']) links = LINKS.findall(members_resp.headers.get('Link', '')) links = {link[1]:link[0] for link in links} if 'next' in links: logger.debug("github: %s has too many users, requesting more" % (self.org)) url = links['next'] else: logger.info("github: Got %s users for %s" % (len(members), self.org)) break return self._get_github_accounts(all_members, token)
def group_info(self): logger.debug("gmane: Switching to archive %s" % (self.group)) return self.gmane.group(self.group)