def get_stats(): stats = {} # load up our people pickle author_activity = cc.CortxActivity() all_people = cc.CortxCommunity() slack_people = cc.SlackCommunity() # init web client client = get_client() print("Getting channels") channels = get_channels(client, limit=None) for cname in channels.values(): stats[cname] = {} stats[GLOBAL] = {} print(stats) print("Joining channels") join_channels(client, channels) print("Getting member lists") (all_members, active_members) = get_members(client, all_people, slack_people) print("%d members, %d active members" % (len(all_members), len(active_members))) stats[GLOBAL]['slack_members'] = all_members stats[GLOBAL]['slack_active_members'] = active_members print("Getting talkers lists") (all_talkers, weekly_talkers) = get_conversations(client, channels, slack_people, author_activity, stats) for cname in channels.values(): stats[cname]['slack_participants'] = all_talkers[cname] stats[cname]['slack_weekly_participants'] = weekly_talkers[cname] stats[GLOBAL]['slack_participants'] = all_talkers[GLOBAL] stats[GLOBAL]['slack_weekly_participants'] = weekly_talkers[GLOBAL] print(stats) print("Getting member counts") member_counts = get_member_count(client, channels) print(member_counts) for cname in channels.values(): stats[cname]['slack_member_count'] = member_counts[cname] stats[GLOBAL]['slack_member_count'] = member_counts[GLOBAL] return stats
def get_activities(logins, company, people): activity = cortx_community.CortxActivity() activities = {} logins = get_logins(logins, people, company) for login in logins: activities[login] = {} try: # create a new structure to hold the data in an organization that is more easily sorted # go through the activity and save each into the new format # problem is that the watch event doesn't have a date for it . . . for (url, created_at) in activity.get_activities(login): if created_at is not None: # just don't count watch events since they don't have a date activities[login][created_at] = url except KeyError: pass #print("Login %s has no observed activity" % login) return (activities, logins)
def collect_stats(update): gh = Github(os.environ.get('GH_OATH')) avoid_rate_limiting(gh) stx = gh.get_organization('Seagate') today = datetime.today().strftime('%Y-%m-%d') # averages are weird so handle them differently ave_age_str='_ave_age_in_s' # the shared structure that we use for collecting stats global_stats = { 'branches' : 0, 'clones_count_14_days' : 0, 'clones_unique_14_days' : 0, 'comments' : 0, 'commits' : 0, 'companies_contributing' : set(), 'companies' : set(), 'contributors' : set(), 'domains' : set(), 'downloads_releases' : 0, 'downloads_vms' : 0, 'email_addresses' : set(), 'external_comments' : 0, 'external_email_addresses' : set(), 'forks_external' : set(), 'forks' : set(), 'logins' : set(), 'new_external_activities' : set(), 'new_logins' : set(), 'pull_requests_external_merged' : 0, 'pull_requests_internal_merged' : 0, 'pull_requests_merged' : 0, 'seagate_blog_referrer_count' : 0, 'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_count' : 0, 'seagate_referrer_uniques' : 0, 'stars_external' : set(), 'stars' : set(), 'top_paths' : [], 'top_referrers' : [], 'views_count_14_days' : 0, 'views_unique_14_days' : 0, 'watchers_external' : set(), 'watchers' : set(), } load_actors(global_stats,('mannequin','innersource','external','hackathon','bot','cortx_team','unknown')) load_items(global_stats,('issues','pull_requests'),('_external','_internal',''),('','_open','_closed','_open_ave_age_in_s','_closed_ave_age_in_s')) global_stats['pull_requests_external_merged'] = 0 local_stats_template = copy.deepcopy(global_stats) # save an empty copy of the stats struct to copy for each repo author_activity = cortx_community.CortxActivity() # load up the author activity pickle people = cortx_community.CortxCommunity() # load up the people pickle persistent_stats = cortx_community.PersistentStats() # load up all the stats for repo in stx.get_repos(): rname = repo.name # put this in a variable just in case it is a github API to fetch this if 'cortx' not in rname or rname.endswith('.old') or rname.endswith('-old') or repo.private: continue local_stats = copy.deepcopy(local_stats_template) # get an empty copy of the stats structure # Use this update if you just want to add some new data and don't want to wait for the very slow time # to scrape all activity. Once you have finished the update, migrate the code out of the update block. # Typically we don't use update; only during development # Note that update doesn't work for values that are incremented . . . if update: (cached_local_stats,timestamp) = persistent_stats.get_latest(rname) # load the cached version print("Fetched %s data for %s" % (timestamp, repo)) for k,v in cached_local_stats.items(): local_stats[k] = v else: get_top_level_repo_info(local_stats,repo,people=people,author_activity=author_activity,gh=gh,) get_issues_and_prs(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh) get_commits(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh) get_contributors(rname,repo,local_stats,people=people,gh=gh) # what we need to do is query when the last time this ran and then pass 'since' to get_commits # summarize info for this repo and persist the data structures summarize_consolidate(local_stats,global_stats,people=people,author_activity=author_activity,ave_age_str=ave_age_str) persist_author_activity(author_activity) persistent_stats.add_stats(date=today,repo=rname,stats=local_stats) persistent_stats.print_repo(rname,local_stats,date=today,verbose=False,csv=False) # do a bit of cleaning on global stats # print and persist the global consolidated stats # treat the 'ave_age_in_s' fields differently # all those fields have consistent names: 'x_ave_age_in_s' # also, there will always be a corresponding field x which is the count for ave_age in [key for key in global_stats.keys() if ave_age_str in key]: item = ave_age[0:len(ave_age)-len(ave_age_str)] try: global_stats[ave_age] /= global_stats[item] except ZeroDivisionError: global_stats[ave_age] = 0 global_stats['top_referrers'] = consolidate_referrers(global_stats['top_referrers']) persistent_stats.print_repo('GLOBAL',global_stats,date=today,verbose=False,csv=False) persistent_stats.add_stats(date=today,repo='GLOBAL',stats=global_stats)
def main(): parser = argparse.ArgumentParser( description='Retrieve all activity done by a particular user.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( 'login', metavar='LOGIN', type=str, help= "Comma-separate lists of logins [can use External,Hackathon,EU R&D,Innersource,All,Unknown as wildcards]" ) parser.add_argument('-s', '--since', type=str, help="Only show activity since yyyy-mm-dd") parser.add_argument('-u', '--until', type=str, help="Only show activity until yyyy-mm-dd") parser.add_argument('-w', '--last_week', action='store_true', help="Only show activity in the last seven days") parser.add_argument('-m', '--last_month', action='store_true', help="Only show activity in the last 30 days") parser.add_argument( '-d', '--details', action='store_true', help="Print stats for pulls and commits, also reports a total score") parser.add_argument( '-c', '--company', action='store_true', help= "Instead of looking up an individual, look up all folks from a particular company" ) parser.add_argument('-l', '--limit', type=int, help="Only show actions if gte to limit") parser.add_argument('-z', '--zero', action='store_true', help="Show folks even if they have no actions") args = parser.parse_args() activity = cortx_community.CortxActivity() people = cortx_community.CortxCommunity() if args.since: args.since = dateparser.parse(args.since) if args.until: args.until = dateparser.parse(args.until) if args.last_week: args.since = datetime.datetime.today() - datetime.timedelta(days=7) if args.last_month: args.since = datetime.datetime.today() - datetime.timedelta(days=30) daterange = "since %s" % (args.since.strftime('%Y-%m-%d') if args.since else "inception") if args.until: daterange += " until %s" % args.until.strftime('%Y-%m-%d') if args.details: gh = Github(os.environ.get('GH_OATH')) stx = gh.get_organization('Seagate') activities = {} logins = get_logins(args.login, people, args.company) for login in logins: activities[login] = {} try: # create a new structure to hold the data in an organization that is more easily sorted # go through the activity and save each into the new format # problem is that the watch event doesn't have a date for it . . . for (url, created_at) in activity.get_activities(login): if created_at is not None: # just don't count watch events since they don't have a date activities[login][created_at] = url except KeyError: pass #print("Login %s has no observed activity" % login) # using the new data structure, filter by since and until filtered_activities = {} if args.since or args.until: for login, actions in sorted(activities.items()): filtered_activities[login] = {} for d, u in sorted(actions.items()): if args.since and d < args.since: continue if args.until and d > args.until: continue filtered_activities[login][d] = u else: filtered_activities = activities # optionally filter by limit if args.limit: new_filtered = {} for login, actions in sorted(filtered_activities.items()): if len(actions) >= args.limit: new_filtered[login] = actions filtered_activities = new_filtered if len(logins) > 1: print("Getting activities from %d logins: %s" % (len(logins), sorted(logins))) # now print from the filtered list total_actions = 0 for k in sorted(filtered_activities, key=lambda k: len(filtered_activities[k]), reverse=True): login = k actions = filtered_activities[k] #for login,actions in sorted(filtered_activities.items()): try: email = people.get_email(login) Type = people.get_type(login) except KeyError: email = None Type = None total_score = 0 if len(actions) > 0 or args.zero: print("%d actions for %s [email %s, Type %s] %s" % (len(actions), login, email, Type, daterange)) total_actions += len(actions) for d, u in sorted(actions.items()): if args.details: (points, details) = get_details(u, stx) total_score += points print("\t-- %s %s %s %s" % (login, d, u, details if args.details else '')) if len(actions) > 0 and args.details: print("\t%4.1f POINTS for %s" % (total_score, login)) print("SUMMARY: %d total observed actions from %s %s" % (total_actions, args.login, daterange))
def main(): parser = argparse.ArgumentParser( description='Retrieve all activity done by a particular user.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument( 'login', metavar='LOGIN', type=str, help= "Comma-separate lists of logins [can use External,Hackathon,Innersource,All,Unknown as wildcards]" ) parser.add_argument('-s', '--since', type=str, help="Only show activity since yyyy-mm-dd") parser.add_argument('-u', '--until', type=str, help="Only show activity until yyyy-mm-dd") parser.add_argument('-l', '--last_week', action='store_true', help="Only show activity in the last seven days") parser.add_argument( '-d', '--details', action='store_true', help="Print stats for pulls and commits, also reports a total score") args = parser.parse_args() activity = cortx_community.CortxActivity() if args.since: args.since = dateparser.parse(args.since) if args.until: args.until = dateparser.parse(args.until) if args.last_week: args.since = datetime.datetime.today() - datetime.timedelta(days=7) daterange = "since %s" % (args.since.strftime('%Y-%m-%d') if args.since else "inception") if args.until: daterange += " until %s" % args.until.strftime('%Y-%m-%d') if args.details: gh = Github(os.environ.get('GH_OATH')) stx = gh.get_organization('Seagate') activities = {} logins = get_logins(args.login) for login in logins: activities[login] = {} try: # create a new structure to hold the data in an organization that is more easily sorted # go through the activity and save each into the new format # problem is that the watch event doesn't have a date for it . . . for (url, created_at) in activity.get_activity(login): if created_at is not None: # just don't count watch events since they don't have a date activities[login][created_at] = url except KeyError: pass #print("Login %s has no observed activity" % login) # using the new data structure, filter by since and until filtered_activities = {} if args.since or args.until: for login, actions in sorted(activities.items()): filtered_activities[login] = {} for d, u in sorted(actions.items()): if args.since and d < args.since: continue if args.until and d > args.until: continue filtered_activities[login][d] = u else: filtered_activities = activities # now print from the filtered list total_actions = 0 for login, actions in sorted(filtered_activities.items()): total_score = 0 if len(actions) > 0: print("%d actions for %s %s" % (len(actions), login, daterange)) total_actions += len(actions) for d, u in sorted(actions.items()): if args.details: (points, details) = get_details(u, stx) total_score += points print("\t%s %s %s %s" % (login, d, u, details if args.details else '')) if len(actions) > 0 and args.details: print("\t%4.1f POINTS for %s" % (total_score, login)) print("SUMMARY: %d total observed actions from %s %s" % (total_actions, args.login, daterange))
def collect_stats(gh,org_name,update,prefix,top_only): avoid_rate_limiting(gh) today = datetime.today().strftime('%Y-%m-%d') # populate our persistent data structures from the pickles people = cortx_community.CortxCommunity(org_name) author_activity = cortx_community.CortxActivity(org_name) persistent_stats = cortx_community.PersistentStats(org_name) # averages are weird so handle them differently ave_age_str='_ave_age_in_s' # the shared structure that we use for collecting stats global_stats = { 'branches' : 0, 'clones_count_14_days' : 0, 'clones_unique_14_days' : 0, 'comments' : 0, 'commits' : 0, 'companies_contributing' : set(), 'companies' : set(), 'contributors' : set(), 'domains' : set(), 'downloads_releases' : 0, 'downloads_vms' : 0, 'email_addresses' : set(), 'external_comments' : 0, 'external_email_addresses' : set(), 'forks_external' : set(), 'forks' : set(), 'logins' : set(), 'new_external_activities' : set(), 'new_logins' : set(), 'pull_requests_external_merged' : 0, 'pull_requests_internal_merged' : 0, 'pull_requests_merged' : 0, 'seagate_blog_referrer_count' : 0, 'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_count' : 0, 'seagate_referrer_uniques' : 0, 'stars_external' : set(), 'stars' : set(), 'top_paths' : [], 'top_referrers' : [], 'views_count_14_days' : 0, 'views_unique_14_days' : 0, 'watchers_external' : set(), 'watchers' : set(), } load_actors(global_stats,people) load_items(global_stats,('issues','pull_requests'),('_external','_internal',''),('','_open','_closed','_open_ave_age_in_s','_closed_ave_age_in_s')) local_stats_template = copy.deepcopy(global_stats) # save an empty copy of the stats struct to copy for each repo for repo in cortx_community.get_repos(org_name=org_name,prefix=prefix): while True: # add a while loop since we are always failing and it would be good to run successfully more often try: local_stats = copy.deepcopy(local_stats_template) # get an empty copy of the stats structure rname=repo.name # just in case this requires a github API call, fetch it once and reuse it # Use this update if you just want to add some new data and don't want to wait for the very slow time # to scrape all activity. Once you have finished the update, migrate the code out of the update block. # Typically we don't use update; only during development # Note that update doesn't work for values that are incremented . . . if update: (cached_local_stats,timestamp) = persistent_stats.get_latest(rname) # load the cached version print("Fetched %s data for %s" % (timestamp, repo)) for k,v in cached_local_stats.items(): local_stats[k] = v else: get_top_level_repo_info(local_stats,repo,people=people,author_activity=author_activity,gh=gh,org_name=org_name) get_contributors(rname,repo,local_stats,people=people,gh=gh,org_name=org_name) if not top_only: get_issues_and_prs(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name) get_commits(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name) # what we need to do is query when the last time this ran and then pass 'since' to get_commits # summarize info for this repo and persist the data structures summarize_consolidate(local_stats,global_stats,people=people,author_activity=author_activity,ave_age_str=ave_age_str) persist_author_activity(author_activity) persistent_stats.add_stats(date=today,repo=rname,stats=local_stats) persistent_stats.print_repo(rname,local_stats,date=today,verbose=False,csv=False) break except Exception as e: print("WTF: Failed while getting stats for repo %s" % repo.name, e) avoid_rate_limiting(gh,Verbose=True) # do a bit of cleaning on global stats # print and persist the global consolidated stats # treat the 'ave_age_in_s' fields differently # all those fields have consistent names: 'x_ave_age_in_s' # also, there will always be a corresponding field x which is the count for ave_age in [key for key in global_stats.keys() if ave_age_str in key]: item = ave_age[0:len(ave_age)-len(ave_age_str)] try: global_stats[ave_age] /= global_stats[item] except ZeroDivisionError: global_stats[ave_age] = 0 global_stats['top_referrers'] = consolidate_referrers(global_stats['top_referrers']) persistent_stats.print_repo('GLOBAL',global_stats,date=today,verbose=False,csv=False) persistent_stats.add_stats(date=today,repo='GLOBAL',stats=global_stats)