예제 #1
0
def get_all_repos(gh,orgs,Verbose=False):
  repos={}
  for project,org in orgs.items():
    org_name    = org[0]
    repo_prefix = org[1]
    repos[project] = cc.get_repos(gh=gh,org_name=org_name,prefix=repo_prefix)
  if Verbose:
    for p,r in repos.items():
      print(p, [k.name for k in r])
  return repos
예제 #2
0
def collect_stats(gh,org_name,update,prefix,top_only):
  avoid_rate_limiting(gh)
  today = datetime.today().strftime('%Y-%m-%d')

  # populate our persistent data structures from the pickles
  people = cortx_community.CortxCommunity(org_name)             
  author_activity = cortx_community.CortxActivity(org_name)     
  persistent_stats = cortx_community.PersistentStats(org_name)  

  # averages are weird so handle them differently
  ave_age_str='_ave_age_in_s'

  # the shared structure that we use for collecting stats
  global_stats = { 'branches'                      : 0, 
                   'clones_count_14_days'          : 0,
                   'clones_unique_14_days'         : 0,
                   'comments'                      : 0,
                   'commits'                       : 0, 
                   'companies_contributing'        : set(),
                   'companies'                     : set(), 
                   'contributors'                  : set(), 
                   'domains'                       : set(), 
                   'downloads_releases'            : 0,
                   'downloads_vms'                 : 0,
                   'email_addresses'               : set(), 
                   'external_comments'             : 0,
                   'external_email_addresses'      : set(),
                   'forks_external'                : set(),
                   'forks'                         : set(),
                   'logins'                        : set(), 
                   'new_external_activities'       : set(),
                   'new_logins'                    : set(),
                   'pull_requests_external_merged' : 0,
                   'pull_requests_internal_merged' : 0,
                   'pull_requests_merged'          : 0,
                   'seagate_blog_referrer_count'   : 0,
                   'seagate_blog_referrer_uniques' : 0,
                   'seagate_referrer_count'        : 0,
                   'seagate_referrer_uniques'      : 0,
                   'stars_external'                : set(),
                   'stars'                         : set(),
                   'top_paths'                     : [], 
                   'top_referrers'                 : [],
                   'views_count_14_days'           : 0,
                   'views_unique_14_days'          : 0,
                   'watchers_external'             : set(),
                   'watchers'                      : set(),
                    }
  load_actors(global_stats,people)
  load_items(global_stats,('issues','pull_requests'),('_external','_internal',''),('','_open','_closed','_open_ave_age_in_s','_closed_ave_age_in_s'))
  local_stats_template = copy.deepcopy(global_stats)    # save an empty copy of the stats struct to copy for each repo

  for repo in cortx_community.get_repos(org_name=org_name,prefix=prefix): 
    while True: # add a while loop since we are always failing and it would be good to run successfully more often
      try:
        local_stats = copy.deepcopy(local_stats_template) # get an empty copy of the stats structure
        rname=repo.name # just in case this requires a github API call, fetch it once and reuse it

        # Use this update if you just want to add some new data and don't want to wait for the very slow time
        # to scrape all activity.  Once you have finished the update, migrate the code out of the update block.
        # Typically we don't use update; only during development 
        # Note that update doesn't work for values that are incremented . . . 
        if update:
          (cached_local_stats,timestamp) = persistent_stats.get_latest(rname)  # load the cached version
          print("Fetched %s data for %s" % (timestamp, repo))
          for k,v in cached_local_stats.items():
            local_stats[k] = v
        else:
          get_top_level_repo_info(local_stats,repo,people=people,author_activity=author_activity,gh=gh,org_name=org_name)
          get_contributors(rname,repo,local_stats,people=people,gh=gh,org_name=org_name)
          if not top_only:
            get_issues_and_prs(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name)
            get_commits(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name)

        # what we need to do is query when the last time this ran and then pass 'since' to get_commits

        # summarize info for this repo and persist the data structures
        summarize_consolidate(local_stats,global_stats,people=people,author_activity=author_activity,ave_age_str=ave_age_str)
        persist_author_activity(author_activity)
        persistent_stats.add_stats(date=today,repo=rname,stats=local_stats)
        persistent_stats.print_repo(rname,local_stats,date=today,verbose=False,csv=False)
        break
      except Exception as e:
        print("WTF: Failed while getting stats for repo %s" % repo.name, e)
        avoid_rate_limiting(gh,Verbose=True)

  # do a bit of cleaning on global stats
  # print and persist the global consolidated stats

  # treat the 'ave_age_in_s' fields differently 
  # all those fields have consistent names: 'x_ave_age_in_s'
  # also, there will always be a corresponding field x which is the count
  for ave_age in [key for key in global_stats.keys() if ave_age_str in key]:
    item  = ave_age[0:len(ave_age)-len(ave_age_str)]
    try:
      global_stats[ave_age] /= global_stats[item]
    except ZeroDivisionError:
      global_stats[ave_age] = 0

  global_stats['top_referrers'] = consolidate_referrers(global_stats['top_referrers'])

  persistent_stats.print_repo('GLOBAL',global_stats,date=today,verbose=False,csv=False)
  persistent_stats.add_stats(date=today,repo='GLOBAL',stats=global_stats)