def fetch(): sys.stdout.write('loading') sys.stdout.flush() repos = Repo.load_sample() authors = {author.login: author for author in Author.load(FILE)} seen = 0 total = len(repos) failures = [] last_write = datetime.datetime.now() el = Elaborator() for repo in repos: seen += 1 if repo.username in authors: logging.info("already fetched %s", repo.username) continue try: gh_data = el._gh_request( 'GET', '/users/' + repo.username ) except: #loop really needs to keep running logging.exception("problem! %s", repo) failures.append(repo) continue authors[repo.username] = Author(**{key: gh_data.get(key, None) for key in ['login', # "octocat" 'id', # 1 'avatar_url', # "https://github.com/images/error/octocat_happy.gif" 'gravatar_id', # "somehexcode" 'url', # "https://api.github.com/users/octocat" 'name', # "monalisa octocat" 'company', # "GitHub" 'blog', # "https://github.com/blog" 'location', # "San Francisco" 'email', # "*****@*****.**" 'hireable', # false 'bio', # "There once was..." 'public_repos', # 2 'public_gists', # 1 'followers', # 20 'following', # 0 'html_url', # "https://github.com/octocat" 'created_at', # "2008-01-14T04:33:35Z" 'type', # "User" ]}) logging.info("fetched %s", repo.username) progress_bar(seen, total) since_write = datetime.datetime.now() - last_write if since_write > datetime.timedelta(minutes=5): sys.stdout.write("\r(writing results)") sys.stdout.flush() Author.dump(authors.values(), FILE) last_write = datetime.datetime.now() print # from progress bar line if failures: print "%s failures:" % len(failures) for f in failures: print " %s" % f print print 'writing out...' Author.dump(authors.values(), FILE)