def send_stats_to_graphite(): CARBON_SERVER = '127.0.0.1' CARBON_PORT = 2003 sock = socket() try: sock.connect((CARBON_SERVER, CARBON_PORT)) except: print 'Couldn\'t connect to %(server)s on port %(port)d, is carbon-agent.py running?' % { 'server': CARBON_SERVER, 'port': CARBON_PORT } return 1 now = time() lines = [] db = core.get_db() stats = { 'rate-limit': lambda: core.cache.get(key) or 0, 'num-users': lambda: db.users.find().count(), } for key, getter in stats.items(): lines.append('gitorama.{key} {value} {now}'.format(key=key, value=getter(), now=now)) message = '\n'.join(lines) + '\n' #all lines must end in a newline sock.sendall(message)
def index(username): db = core.get_db() user = db.users.find_one({'login': username}) if user is None: abort(404) stats = db.user_stats.find_one({'login': username}, sort=[('date', DESCENDING)]) return render_template('userprofile.html', user=user, stats=stats)
def send_stats_to_graphite(): CARBON_SERVER = '127.0.0.1' CARBON_PORT = 2003 sock = socket() try: sock.connect((CARBON_SERVER, CARBON_PORT)) except: print 'Couldn\'t connect to %(server)s on port %(port)d, is carbon-agent.py running?' % { 'server':CARBON_SERVER, 'port':CARBON_PORT } return 1 now = time() lines = [] db = core.get_db() stats = { 'rate-limit': lambda: core.cache.get(key) or 0, 'num-users': lambda: db.users.find().count(), } for key, getter in stats.items(): lines.append('gitorama.{key} {value} {now}'.format( key=key, value=getter(), now=now )) message = '\n'.join(lines) + '\n' #all lines must end in a newline sock.sendall(message)
def fetch_user_events(login): logger = logging.getLogger('events') logger.debug('fetching events for %s' % (login, )) db = get_db() user = db.users.find_one({'login': login}) # calculating time of last saved event last_item = list( db.received_events.find({ 'gitorama.login': user['login'] }).sort([('created_at', -1)])[:1]) if last_item: last_item = last_item[0]['id'] # this is a GitHub's event id else: last_item = None gh = net.GitHub(token=user['gitorama']['token']) for event in gh.get_iter('/users/{0}/received_events'.format( user['login']), per_page=30): if event['id'] == last_item: # don't fetch more than needed # this item already saved break event['created_at'] = times.to_universal(event['created_at']) event['gitorama'] = {'login': user['login']} db.received_events.save(event)
def update_user(login): g.db = core.get_db() now = times.now() stats_to_save = ('followers', 'following', 'disk_usage', 'public_repos') user = g.db.users.find_one({'login': login}) gh = net.GitHub(token=user['gitorama']['token']) # update user's data new_user_data = gh.get('/user') user.update(new_user_data) user['gitorama']['update_at'] = now + app.config['USER_UPDATE_INTERVAL'] g.db.users.save(user) # update users's repositories repositories = gh.get('/user/repos') for rep in repositories: rep_from_db = g.db.user_reps.find_one( { 'owner.login': rep['owner']['login'], 'name': rep['name'] }) or {} rep_from_db.update(rep) g.db.user_reps.save(rep_from_db) today = datetime.datetime(now.year, now.month, now.day) key = dict(login=user['login'], date=today) stats = g.db.user_stats.find_one(key) or key stats.update((key, value) for key, value in user.iteritems() if key in stats_to_save) g.db.user_stats.save(stats)
def dbshell(): db = core.get_db() result = db.connection.admin.command({'isMaster': 1}) if 'primary' in result: host, port = result['primary'].split(':') else: host, port = 'localhost', 27017 subprocess.call('mongo --host "{host}" --port "{port}" "{db.name}"'.format(**locals()), shell=True)
def migrate(): db = core.get_db() if 'raw_events' in db.collection_names(): db.raw_events.rename('received_events') for event in db.received_events.find(): if 'gitorama' not in event: event['gitorama'] = {'login': '******'} db.received_events.save(event)
def is_all_mongos_are_up(): db = core.get_db() result = db.connection.admin.command({'isMaster': 1}) if 'setName' in result: result = db.connection.admin.command('replSetGetStatus') for member in result['members']: if member['state'] not in [1, 2, 7]: return 1 return 0
def cluster_reps_into_networks(): db = core.get_db() gh = net.GitHub() query = {'gitorama.net_id': {'$exists': False}} rep = db.user_reps.find(query).limit(1) while rep: forks = gh.get('/repos/{0[owner][login]}/{0[name]}/forks'.format(rep)) import pdb;pdb.set_trace()
def dbshell(): db = core.get_db() result = db.connection.admin.command({'isMaster': 1}) if 'primary' in result: host, port = result['primary'].split(':') else: host, port = 'localhost', 27017 subprocess.call( 'mongo --host "{host}" --port "{port}" "{db.name}"'.format(**locals()), shell=True)
def run(self, debug=False): db = core.get_db() use_connection() q = Queue() for list_getter, obj_processor in self.pipelines: objects = list_getter(db) for obj in objects: if debug: obj_processor(obj) else: q.enqueue(obj_processor, obj)
def migrate(): db = core.get_db() if 'received_events' in db.collection_names(): db.backup_events.insert(db.received_events.find(), safe=True) db.received_events.remove() for event in db.backup_events.find(): event['id'] = event['_id'] del event['_id'] db.received_events.save(event) db.backup_events.drop()
def update_relations(login): from gitorama import app db = core.get_db() gh = net.GitHub() now = times.now() db.relations.ensure_index('login', unique=True) doc = db.relations.find_one({'login': login}) if 'following' not in doc and 'followers' not in doc: doc['following'] = [] doc['followers'] = [] # don't create events for the first run create_events = False else: create_events = True def process(handle, new_event, missing_event): """Process new users in following/followed lists. """ old = set(doc[handle]) new = gh.get('/users/{login}/{handle}'.format(**locals())) new = set(f['login'] for f in new) new_items = new - old absent_items = old - new if create_events: if new_items: db.events.insert( ( {'login': login, 'e': new_event, 'who': who, 'date': now} for who in new_items ), manipulate = False, ) if absent_items: db.events.insert( ( {'login': login, 'e': missing_event, 'who': who, 'date': now} for who in absent_items ), manipulate = False, ) process('following', 'follow', 'unfollow') process('followers', 'followed', 'unfollowed') doc['update_at'] = now + app.config['USER_UPDATE_INTERVAL'] db.relations.save(doc, manipulate=False, safe=True)
def migrate(): now = datetime.datetime.utcnow() for filename in os.listdir(os.path.dirname(__file__)): if not filename.startswith('__') and filename.endswith('.py'): migration_name = filename[:-3] if not is_applied(migration_name): print 'Applying "{0}" migration…'.format(migration_name) module = importlib.import_module('.' + migration_name, __name__) module.migrate() db = core.get_db() db.migrations.save(dict(name=migration_name, migrated_at=now))
def create_fork_watches(): db = core.get_db() for user in db.users.find(): num_fork_watches = db.fork_warches.find( {'login': user['login']} ).count() if num_fork_watches == 0: for rep in db.user_reps.find({'owner.login': user['login']}): db.fork_watches.save(dict( login=user['login'], name=rep['name'], ))
def migrate_relations(following): db = core.get_db() followers = db.followers.find_one({'login': following['login']}) db.relations.save(dict( login=following['login'], following=following['users'], followers=followers['users'], update_at=times.now(), ), safe=True ) db.following.remove(following['_id'], safe=True) db.following.remove(followers['_id'], safe=True)
def migrate(): now = datetime.datetime.utcnow() for filename in os.listdir(os.path.dirname(__file__)): if not filename.startswith('__') and filename.endswith('.py'): migration_name = filename[:-3] if not is_applied(migration_name): print 'Applying "{0}" migration…'.format(migration_name) module = importlib.import_module('.' + migration_name, __name__) module.migrate() db = core.get_db() db.migrations.save( dict(name=migration_name, migrated_at=now) )
def update_user(login): g.db = core.get_db() now = times.now() stats_to_save = ( 'followers', 'following', 'disk_usage', 'public_repos' ) user = g.db.users.find_one({'login': login}) gh = net.GitHub(token=user['gitorama']['token']) # update user's data new_user_data = gh.get('/user') user.update(new_user_data) user['gitorama']['update_at'] = now + app.config['USER_UPDATE_INTERVAL'] g.db.users.save(user) # update users's repositories repositories = gh.get('/user/repos') for rep in repositories: rep_from_db = g.db.user_reps.find_one( { 'owner.login': rep['owner']['login'], 'name': rep['name'] } ) or {} rep_from_db.update(rep) g.db.user_reps.save(rep_from_db) today = datetime.datetime(now.year, now.month, now.day) key = dict(login=user['login'], date=today) stats = g.db.user_stats.find_one(key) or key stats.update( (key, value) for key, value in user.iteritems() if key in stats_to_save ) g.db.user_stats.save(stats)
def update_reps_data(): db = core.get_db() gh = net.GitHub() timestamp = int(time.time()) query = { '$or': [ {'gitorama.updated_at': {'$exists': False}}, {'gitorama.updated_at': {'$lt': timestamp - 24 * 3600}}, ] } for rep in db.user_reps.find(query): # update rep's data new_rep_data = gh.get('/repos/{0[owner][login]}/{0[name]}'.format(rep)) rep.update(new_rep_data) rep.setdefault('gitorama', {}) rep['gitorama']['updated_at'] = timestamp db.user_reps.save(rep)
def fetch_user_events(login): logger = logging.getLogger('events') logger.debug('fetching events for %s' % (login,)) db = get_db() user = db.users.find_one({'login': login}) # calculating time of last saved event last_item = list(db.received_events.find({'gitorama.login': user['login']}).sort([('created_at', -1)])[:1]) if last_item: last_item = last_item[0]['id'] # this is a GitHub's event id else: last_item = None gh = net.GitHub(token=user['gitorama']['token']) for event in gh.get_iter('/users/{0}/received_events'.format(user['login']), per_page=30): if event['id'] == last_item: # don't fetch more than needed # this item already saved break event['created_at'] = times.to_universal(event['created_at']) event['gitorama'] = {'login': user['login']} db.received_events.save(event)
def is_applied(migration_name): db = core.get_db() migration = db.migrations.find_one({'name': migration_name}) return migration is not None
def migrate(): db = core.get_db() for event in db.raw_events.find(): if isinstance(event['created_at'], basestring): event['created_at'] = times.to_universal(event['created_at']) db.raw_events.save(event)
def update_digest(login, collection_name='daily_digests', period=1): db = get_db() result = db.received_events.inline_map_reduce( """ function() { var doc = {types: {}}; var obj = {}; function copy_actor(item) { return { login: item.actor.login, gravatar_id: item.actor.gravatar_id, url: item.actor.url } } if (this.type == 'WatchEvent') { obj.actor = copy_actor(this) } else if (this.type == 'ForkEvent') { obj.actor = copy_actor(this) } else if (this.type == 'GollumEvent') { obj.actor = copy_actor(this) } else if (this.type == 'PushEvent') { obj.actor = copy_actor(this) obj.commits = this.payload.commits } else if (this.type == 'PublicEvent') { obj.actor = copy_actor(this) } else if (this.type == 'IssuesEvent') { obj.actor = copy_actor(this) obj.action = this.payload.action } else { if (this.type != 'CreateEvent' && this.type != 'MemberEvent') { obj = 'unknown event' } } doc.types[this.type] = [obj]; emit(this.repo.name, doc); } """, """ function (key, values) { var result = {types: {}}; values.forEach(function(value) { for (var key in value.types) { if (result.types[key] === undefined) { result.types[key] = []; } result.types[key] = result.types[key].concat(value.types[key]) } }); return result; } """, query={ 'gitorama.login': login, 'created_at': {'$gt': times.now() - datetime.timedelta(period)}, 'repo.name': {'$ne': '/'} }, ) repositories = [] for item in result: name = item['_id'] events = item['value']['types'] score = 0 good_events = {} for event_name, event_data in events.items(): if event_data[0] == 'unknown event': print 'Unknown', event_name, 'in', name if event_name in ('ForkEvent', 'PushEvent', 'GollumEvent', 'WatchEvent', 'IssuesEvent'): good_events[event_name] = event_data score += len(event_data) if good_events: repositories.append( dict(name=name, events=good_events, score=score) ) repositories.sort(key=lambda x: x['score'], reverse=True) digest = dict(repositories=repositories) digest['_id'] = login digest['update_at'] = times.now() + datetime.timedelta(1) db[collection_name].save(digest)
def update_digest(login, collection_name='daily_digests', period=1): db = get_db() result = db.received_events.inline_map_reduce( """ function() { var doc = {types: {}}; var obj = {}; function copy_actor(item) { return { login: item.actor.login, gravatar_id: item.actor.gravatar_id, url: item.actor.url } } if (this.type == 'WatchEvent') { obj.actor = copy_actor(this) } else if (this.type == 'ForkEvent') { obj.actor = copy_actor(this) } else if (this.type == 'GollumEvent') { obj.actor = copy_actor(this) } else if (this.type == 'PushEvent') { obj.actor = copy_actor(this) obj.commits = this.payload.commits } else if (this.type == 'PublicEvent') { obj.actor = copy_actor(this) } else if (this.type == 'IssuesEvent') { obj.actor = copy_actor(this) obj.action = this.payload.action } else { if (this.type != 'CreateEvent' && this.type != 'MemberEvent') { obj = 'unknown event' } } doc.types[this.type] = [obj]; emit(this.repo.name, doc); } """, """ function (key, values) { var result = {types: {}}; values.forEach(function(value) { for (var key in value.types) { if (result.types[key] === undefined) { result.types[key] = []; } result.types[key] = result.types[key].concat(value.types[key]) } }); return result; } """, query={ 'gitorama.login': login, 'created_at': { '$gt': times.now() - datetime.timedelta(period) }, 'repo.name': { '$ne': '/' } }, ) repositories = [] for item in result: name = item['_id'] events = item['value']['types'] score = 0 good_events = {} for event_name, event_data in events.items(): if event_data[0] == 'unknown event': print 'Unknown', event_name, 'in', name if event_name in ('ForkEvent', 'PushEvent', 'GollumEvent', 'WatchEvent', 'IssuesEvent'): good_events[event_name] = event_data score += len(event_data) if good_events: repositories.append( dict(name=name, events=good_events, score=score)) repositories.sort(key=lambda x: x['score'], reverse=True) digest = dict(repositories=repositories) digest['_id'] = login digest['update_at'] = times.now() + datetime.timedelta(1) db[collection_name].save(digest)