def fetch_user_events(login): logger = logging.getLogger('events') logger.debug('fetching events for %s' % (login, )) db = get_db() user = db.users.find_one({'login': login}) # calculating time of last saved event last_item = list( db.received_events.find({ 'gitorama.login': user['login'] }).sort([('created_at', -1)])[:1]) if last_item: last_item = last_item[0]['id'] # this is a GitHub's event id else: last_item = None gh = net.GitHub(token=user['gitorama']['token']) for event in gh.get_iter('/users/{0}/received_events'.format( user['login']), per_page=30): if event['id'] == last_item: # don't fetch more than needed # this item already saved break event['created_at'] = times.to_universal(event['created_at']) event['gitorama'] = {'login': user['login']} db.received_events.save(event)
def update_user(login): g.db = core.get_db() now = times.now() stats_to_save = ('followers', 'following', 'disk_usage', 'public_repos') user = g.db.users.find_one({'login': login}) gh = net.GitHub(token=user['gitorama']['token']) # update user's data new_user_data = gh.get('/user') user.update(new_user_data) user['gitorama']['update_at'] = now + app.config['USER_UPDATE_INTERVAL'] g.db.users.save(user) # update users's repositories repositories = gh.get('/user/repos') for rep in repositories: rep_from_db = g.db.user_reps.find_one( { 'owner.login': rep['owner']['login'], 'name': rep['name'] }) or {} rep_from_db.update(rep) g.db.user_reps.save(rep_from_db) today = datetime.datetime(now.year, now.month, now.day) key = dict(login=user['login'], date=today) stats = g.db.user_stats.find_one(key) or key stats.update((key, value) for key, value in user.iteritems() if key in stats_to_save) g.db.user_stats.save(stats)
def cluster_reps_into_networks(): db = core.get_db() gh = net.GitHub() query = {'gitorama.net_id': {'$exists': False}} rep = db.user_reps.find(query).limit(1) while rep: forks = gh.get('/repos/{0[owner][login]}/{0[name]}/forks'.format(rep)) import pdb;pdb.set_trace()
def update_relations(login): from gitorama import app db = core.get_db() gh = net.GitHub() now = times.now() db.relations.ensure_index('login', unique=True) doc = db.relations.find_one({'login': login}) if 'following' not in doc and 'followers' not in doc: doc['following'] = [] doc['followers'] = [] # don't create events for the first run create_events = False else: create_events = True def process(handle, new_event, missing_event): """Process new users in following/followed lists. """ old = set(doc[handle]) new = gh.get('/users/{login}/{handle}'.format(**locals())) new = set(f['login'] for f in new) new_items = new - old absent_items = old - new if create_events: if new_items: db.events.insert( ( {'login': login, 'e': new_event, 'who': who, 'date': now} for who in new_items ), manipulate = False, ) if absent_items: db.events.insert( ( {'login': login, 'e': missing_event, 'who': who, 'date': now} for who in absent_items ), manipulate = False, ) process('following', 'follow', 'unfollow') process('followers', 'followed', 'unfollowed') doc['update_at'] = now + app.config['USER_UPDATE_INTERVAL'] db.relations.save(doc, manipulate=False, safe=True)
def update_reps_data(): db = core.get_db() gh = net.GitHub() timestamp = int(time.time()) query = { '$or': [ {'gitorama.updated_at': {'$exists': False}}, {'gitorama.updated_at': {'$lt': timestamp - 24 * 3600}}, ] } for rep in db.user_reps.find(query): # update rep's data new_rep_data = gh.get('/repos/{0[owner][login]}/{0[name]}'.format(rep)) rep.update(new_rep_data) rep.setdefault('gitorama', {}) rep['gitorama']['updated_at'] = timestamp db.user_reps.save(rep)