def main(): parser = argparse.ArgumentParser( description='Set a value for CORTX Community.', add_help=False, formatter_class=argparse.ArgumentDefaultsHelpFormatter) required = parser.add_argument_group('required arguments') optional = parser.add_argument_group('optional_requirements') optional.add_argument('-h', '--help', action='help', help='show this help message and exit') required.add_argument('--key', '-k', type=str, help="Which key to set / query", required=True) optional.add_argument('--value', '-v', type=str, help="Which value to set", required=False) optional.add_argument('--date', '-d', type=str, help='Which date to set', required=False) optional.add_argument('--org', '-o', help='Which org', default='Seagate') optional.add_argument('--repo', '-r', help='Which repo', default='GLOBAL') optional.add_argument('--verbose', '-V', help='Do not compress array values into a number', action='store_true', default=False, required=False) args = parser.parse_args() repo = args.repo org = args.org key = args.key val = args.value date = args.date ps = cortx_community.PersistentStats(org_name=args.org) dates = ps.get_dates(args.repo) if date is None: date = dates[-1] print("Defaulting to use last valid date %s" % date) if val is not None: ps.add_stat(date=date, repo=repo, stat=key, value=int(val)) print("Changing %s on %s to be %s" % (repo, date, val)) for d in dates: if args.verbose: print(d, args.key, ps.get_values(args.repo, args.key, [d])) else: print(d, args.key, ps.get_values_as_numbers(args.repo, args.key, [d]))
def merge_stats(slack_stats): ps=cc.PersistentStats() repos=ps.get_repos() for channel in slack_stats.keys(): try: repo = channel_repo_map[channel] except KeyError: repo = channel assert repo in repos, "Do not have a repo into which to merge %s" % channel print("Can %s merge channel %s into corresponding repo" % ( '' if repo in repos else 'not', channel)) merge_stat(slack_stats[channel],ps,repo) ps.persist() # probably unnecessary since ps.add_stat does an internal persist return ps
def collect_stats(update): gh = Github(os.environ.get('GH_OATH')) avoid_rate_limiting(gh) stx = gh.get_organization('Seagate') today = datetime.today().strftime('%Y-%m-%d') # averages are weird so handle them differently ave_age_str='_ave_age_in_s' # the shared structure that we use for collecting stats global_stats = { 'branches' : 0, 'clones_count_14_days' : 0, 'clones_unique_14_days' : 0, 'comments' : 0, 'commits' : 0, 'companies_contributing' : set(), 'companies' : set(), 'contributors' : set(), 'domains' : set(), 'downloads_releases' : 0, 'downloads_vms' : 0, 'email_addresses' : set(), 'external_comments' : 0, 'external_email_addresses' : set(), 'forks_external' : set(), 'forks' : set(), 'logins' : set(), 'new_external_activities' : set(), 'new_logins' : set(), 'pull_requests_external_merged' : 0, 'pull_requests_internal_merged' : 0, 'pull_requests_merged' : 0, 'seagate_blog_referrer_count' : 0, 'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_count' : 0, 'seagate_referrer_uniques' : 0, 'stars_external' : set(), 'stars' : set(), 'top_paths' : [], 'top_referrers' : [], 'views_count_14_days' : 0, 'views_unique_14_days' : 0, 'watchers_external' : set(), 'watchers' : set(), } load_actors(global_stats,('mannequin','innersource','external','hackathon','bot','cortx_team','unknown')) load_items(global_stats,('issues','pull_requests'),('_external','_internal',''),('','_open','_closed','_open_ave_age_in_s','_closed_ave_age_in_s')) global_stats['pull_requests_external_merged'] = 0 local_stats_template = copy.deepcopy(global_stats) # save an empty copy of the stats struct to copy for each repo author_activity = cortx_community.CortxActivity() # load up the author activity pickle people = cortx_community.CortxCommunity() # load up the people pickle persistent_stats = cortx_community.PersistentStats() # load up all the stats for repo in stx.get_repos(): rname = repo.name # put this in a variable just in case it is a github API to fetch this if 'cortx' not in rname or rname.endswith('.old') or rname.endswith('-old') or repo.private: continue local_stats = copy.deepcopy(local_stats_template) # get an empty copy of the stats structure # Use this update if you just want to add some new data and don't want to wait for the very slow time # to scrape all activity. Once you have finished the update, migrate the code out of the update block. # Typically we don't use update; only during development # Note that update doesn't work for values that are incremented . . . if update: (cached_local_stats,timestamp) = persistent_stats.get_latest(rname) # load the cached version print("Fetched %s data for %s" % (timestamp, repo)) for k,v in cached_local_stats.items(): local_stats[k] = v else: get_top_level_repo_info(local_stats,repo,people=people,author_activity=author_activity,gh=gh,) get_issues_and_prs(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh) get_commits(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh) get_contributors(rname,repo,local_stats,people=people,gh=gh) # what we need to do is query when the last time this ran and then pass 'since' to get_commits # summarize info for this repo and persist the data structures summarize_consolidate(local_stats,global_stats,people=people,author_activity=author_activity,ave_age_str=ave_age_str) persist_author_activity(author_activity) persistent_stats.add_stats(date=today,repo=rname,stats=local_stats) persistent_stats.print_repo(rname,local_stats,date=today,verbose=False,csv=False) # do a bit of cleaning on global stats # print and persist the global consolidated stats # treat the 'ave_age_in_s' fields differently # all those fields have consistent names: 'x_ave_age_in_s' # also, there will always be a corresponding field x which is the count for ave_age in [key for key in global_stats.keys() if ave_age_str in key]: item = ave_age[0:len(ave_age)-len(ave_age_str)] try: global_stats[ave_age] /= global_stats[item] except ZeroDivisionError: global_stats[ave_age] = 0 global_stats['top_referrers'] = consolidate_referrers(global_stats['top_referrers']) persistent_stats.print_repo('GLOBAL',global_stats,date=today,verbose=False,csv=False) persistent_stats.add_stats(date=today,repo='GLOBAL',stats=global_stats)
def main(): parser = argparse.ArgumentParser( description='Print the latest statistics for CORTX Community.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-v', '--verbose', action='store_true', help="Show all data") parser.add_argument('-a', '--all', action='store_true', help="Show all repos (i.e. not just summary)") parser.add_argument('-s', '--suppress', action='store_true', help="Don't show interesting fields") parser.add_argument('-i', '--individual', type=str, help="Only show data for a single repo") parser.add_argument('-c', '--csv', action='store_true', help="Output in csv") args = parser.parse_args() ps = cortx_community.PersistentStats() repos = sorted([repo for repo in ps.get_repos() if repo != 'GLOBAL']) # averages are weird so handle them differently ave_age_str = '_ave_age_in_s' # first build the global stats gstats = {} gstats['repo_count'] = len(repos) timestamp = None for repo in repos: (rstats, timestamp) = ps.get_latest(repo) for k, v in rstats.items(): if isinstance(v, int) or isinstance(v, float): if k not in gstats: gstats[k] = 0 if ave_age_str in k: # treat averages differently, put the total value in here, down below adust by count to get actual average item = k[0:len(k) - len(ave_age_str)] count = rstats[item] gstats[k] += (v * count) else: gstats[k] += v elif isinstance(v, set): if k not in gstats: gstats[k] = set() gstats[k] |= v elif isinstance(v, list): if k not in gstats: gstats[k] = [] gstats[k] += v else: raise TypeError("%s has unknown type %s" % (k, type(v))) # top referrers is a bit weird so clean that one up specifically here gstats['top_referrers'] = consolidate_referrers(gstats['top_referrers']) # ugh, there is a problem with the average age fields # all those fields have consistent names: 'x_ave_age_in_s' # also, there will always be a corresponding field x which is the count for ave_age in [key for key in gstats.keys() if ave_age_str in key]: item = ave_age[0:len(ave_age) - len(ave_age_str)] gstats[ave_age] /= gstats[item] # remove some bullshit companies for bs_company in ('Seagate', 'Codacy', 'Dependabot'): for k in ['companies', 'companies_contributing']: try: gstats[k].remove(bs_company) except KeyError: pass if args.individual: (repo, timestamp) = ps.get_latest(args.individual) ps.print_repo(args.individual, repo, timestamp, verbose=args.verbose, csv=args.csv) if not args.suppress: print_interesting_arrays(args.individual, repo) else: ps.print_repo('GLOBAL', gstats, timestamp, verbose=args.verbose, csv=args.csv) if not args.suppress: print_interesting_arrays('GLOBAL', gstats) if args.all: for repo in repos: (rstats, timestamp) = ps.get_latest(repo) ps.print_repo(repo, rstats, timestamp, verbose=args.verbose, csv=args.csv)
def collect_stats(gh,org_name,update,prefix,top_only): avoid_rate_limiting(gh) today = datetime.today().strftime('%Y-%m-%d') # populate our persistent data structures from the pickles people = cortx_community.CortxCommunity(org_name) author_activity = cortx_community.CortxActivity(org_name) persistent_stats = cortx_community.PersistentStats(org_name) # averages are weird so handle them differently ave_age_str='_ave_age_in_s' # the shared structure that we use for collecting stats global_stats = { 'branches' : 0, 'clones_count_14_days' : 0, 'clones_unique_14_days' : 0, 'comments' : 0, 'commits' : 0, 'companies_contributing' : set(), 'companies' : set(), 'contributors' : set(), 'domains' : set(), 'downloads_releases' : 0, 'downloads_vms' : 0, 'email_addresses' : set(), 'external_comments' : 0, 'external_email_addresses' : set(), 'forks_external' : set(), 'forks' : set(), 'logins' : set(), 'new_external_activities' : set(), 'new_logins' : set(), 'pull_requests_external_merged' : 0, 'pull_requests_internal_merged' : 0, 'pull_requests_merged' : 0, 'seagate_blog_referrer_count' : 0, 'seagate_blog_referrer_uniques' : 0, 'seagate_referrer_count' : 0, 'seagate_referrer_uniques' : 0, 'stars_external' : set(), 'stars' : set(), 'top_paths' : [], 'top_referrers' : [], 'views_count_14_days' : 0, 'views_unique_14_days' : 0, 'watchers_external' : set(), 'watchers' : set(), } load_actors(global_stats,people) load_items(global_stats,('issues','pull_requests'),('_external','_internal',''),('','_open','_closed','_open_ave_age_in_s','_closed_ave_age_in_s')) local_stats_template = copy.deepcopy(global_stats) # save an empty copy of the stats struct to copy for each repo for repo in cortx_community.get_repos(org_name=org_name,prefix=prefix): while True: # add a while loop since we are always failing and it would be good to run successfully more often try: local_stats = copy.deepcopy(local_stats_template) # get an empty copy of the stats structure rname=repo.name # just in case this requires a github API call, fetch it once and reuse it # Use this update if you just want to add some new data and don't want to wait for the very slow time # to scrape all activity. Once you have finished the update, migrate the code out of the update block. # Typically we don't use update; only during development # Note that update doesn't work for values that are incremented . . . if update: (cached_local_stats,timestamp) = persistent_stats.get_latest(rname) # load the cached version print("Fetched %s data for %s" % (timestamp, repo)) for k,v in cached_local_stats.items(): local_stats[k] = v else: get_top_level_repo_info(local_stats,repo,people=people,author_activity=author_activity,gh=gh,org_name=org_name) get_contributors(rname,repo,local_stats,people=people,gh=gh,org_name=org_name) if not top_only: get_issues_and_prs(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name) get_commits(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name) # what we need to do is query when the last time this ran and then pass 'since' to get_commits # summarize info for this repo and persist the data structures summarize_consolidate(local_stats,global_stats,people=people,author_activity=author_activity,ave_age_str=ave_age_str) persist_author_activity(author_activity) persistent_stats.add_stats(date=today,repo=rname,stats=local_stats) persistent_stats.print_repo(rname,local_stats,date=today,verbose=False,csv=False) break except Exception as e: print("WTF: Failed while getting stats for repo %s" % repo.name, e) avoid_rate_limiting(gh,Verbose=True) # do a bit of cleaning on global stats # print and persist the global consolidated stats # treat the 'ave_age_in_s' fields differently # all those fields have consistent names: 'x_ave_age_in_s' # also, there will always be a corresponding field x which is the count for ave_age in [key for key in global_stats.keys() if ave_age_str in key]: item = ave_age[0:len(ave_age)-len(ave_age_str)] try: global_stats[ave_age] /= global_stats[item] except ZeroDivisionError: global_stats[ave_age] = 0 global_stats['top_referrers'] = consolidate_referrers(global_stats['top_referrers']) persistent_stats.print_repo('GLOBAL',global_stats,date=today,verbose=False,csv=False) persistent_stats.add_stats(date=today,repo='GLOBAL',stats=global_stats)
def main(): parser = argparse.ArgumentParser( description='Print the latest statistics for CORTX Community.', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('-v', '--verbose', action='store_true', help="Show all data") parser.add_argument('-a', '--all', action='store_true', help="Show all repos (i.e. not just summary)") parser.add_argument('-s', '--suppress', action='store_true', help="Don't show interesting fields") parser.add_argument('-i', '--individual', type=str, help="Only show data for a single repo") parser.add_argument('-c', '--csv', action='store_true', help="Output in csv") parser.add_argument('-o', '--org', action='store', help='Print the latest statistics for a different org', default='Seagate') #parser.add_argument('-k', '--key', type=str, help="Show all values for all dates for a single key") # TODO: would be nice to add this one args = parser.parse_args() ps = cortx_community.PersistentStats(org_name=args.org) repos = sorted([repo for repo in ps.get_repos() if repo != 'GLOBAL']) # just use global from the scrape (gstats, timestamp) = ps.get_latest('GLOBAL') if args.individual: (repo, timestamp) = ps.get_latest(args.individual) ps.print_repo(args.individual, repo, timestamp, verbose=args.verbose, csv=args.csv) if not args.suppress: print_interesting_arrays(args.individual, repo) else: ps.print_repo('GLOBAL', gstats, timestamp, verbose=args.verbose, csv=args.csv) if not args.suppress: print_interesting_arrays('GLOBAL', gstats) if args.all: for repo in repos: (rstats, timestamp) = ps.get_latest(repo) ps.print_repo(repo, rstats, timestamp, verbose=args.verbose, csv=args.csv)