Ejemplo n.º 1
0
def get_stats():

    stats = {}

    # load up our people pickle
    author_activity = cc.CortxActivity()
    all_people = cc.CortxCommunity()
    slack_people = cc.SlackCommunity()

    # init web client
    client = get_client()

    print("Getting channels")
    channels = get_channels(client, limit=None)
    for cname in channels.values():
        stats[cname] = {}
    stats[GLOBAL] = {}
    print(stats)

    print("Joining channels")
    join_channels(client, channels)

    print("Getting member lists")
    (all_members, active_members) = get_members(client, all_people,
                                                slack_people)
    print("%d members, %d active members" %
          (len(all_members), len(active_members)))
    stats[GLOBAL]['slack_members'] = all_members
    stats[GLOBAL]['slack_active_members'] = active_members

    print("Getting talkers lists")
    (all_talkers, weekly_talkers) = get_conversations(client, channels,
                                                      slack_people,
                                                      author_activity, stats)
    for cname in channels.values():
        stats[cname]['slack_participants'] = all_talkers[cname]
        stats[cname]['slack_weekly_participants'] = weekly_talkers[cname]
    stats[GLOBAL]['slack_participants'] = all_talkers[GLOBAL]
    stats[GLOBAL]['slack_weekly_participants'] = weekly_talkers[GLOBAL]
    print(stats)

    print("Getting member counts")
    member_counts = get_member_count(client, channels)
    print(member_counts)
    for cname in channels.values():
        stats[cname]['slack_member_count'] = member_counts[cname]
    stats[GLOBAL]['slack_member_count'] = member_counts[GLOBAL]

    return stats
def get_activities(logins, company, people):
    activity = cortx_community.CortxActivity()
    activities = {}
    logins = get_logins(logins, people, company)
    for login in logins:
        activities[login] = {}
        try:
            # create a new structure to hold the data in an organization that is more easily sorted
            # go through the activity and save each into the new format
            # problem is that the watch event doesn't have a date for it . . .
            for (url, created_at) in activity.get_activities(login):
                if created_at is not None:  # just don't count watch events since they don't have a date
                    activities[login][created_at] = url
        except KeyError:
            pass
            #print("Login %s has no observed activity" % login)
    return (activities, logins)
Ejemplo n.º 3
0
def collect_stats(update):
  gh = Github(os.environ.get('GH_OATH'))
  avoid_rate_limiting(gh)
  stx = gh.get_organization('Seagate')
  today = datetime.today().strftime('%Y-%m-%d')

  # averages are weird so handle them differently
  ave_age_str='_ave_age_in_s'

  # the shared structure that we use for collecting stats
  global_stats = { 'branches'                      : 0, 
                   'clones_count_14_days'          : 0,
                   'clones_unique_14_days'         : 0,
                   'comments'                      : 0,
                   'commits'                       : 0, 
                   'companies_contributing'        : set(),
                   'companies'                     : set(), 
                   'contributors'                  : set(), 
                   'domains'                       : set(), 
                   'downloads_releases'            : 0,
                   'downloads_vms'                 : 0,
                   'email_addresses'               : set(), 
                   'external_comments'             : 0,
                   'external_email_addresses'      : set(),
                   'forks_external'                : set(),
                   'forks'                         : set(),
                   'logins'                        : set(), 
                   'new_external_activities'       : set(),
                   'new_logins'                    : set(),
                   'pull_requests_external_merged' : 0,
                   'pull_requests_internal_merged' : 0,
                   'pull_requests_merged'          : 0,
                   'seagate_blog_referrer_count'   : 0,
                   'seagate_blog_referrer_uniques' : 0,
                   'seagate_referrer_count'        : 0,
                   'seagate_referrer_uniques'      : 0,
                   'stars_external'                : set(),
                   'stars'                         : set(),
                   'top_paths'                     : [], 
                   'top_referrers'                 : [],
                   'views_count_14_days'           : 0,
                   'views_unique_14_days'          : 0,
                   'watchers_external'             : set(),
                   'watchers'                      : set(),
                    }
  load_actors(global_stats,('mannequin','innersource','external','hackathon','bot','cortx_team','unknown'))
  load_items(global_stats,('issues','pull_requests'),('_external','_internal',''),('','_open','_closed','_open_ave_age_in_s','_closed_ave_age_in_s'))
  global_stats['pull_requests_external_merged'] = 0
  local_stats_template = copy.deepcopy(global_stats)    # save an empty copy of the stats struct to copy for each repo
  author_activity = cortx_community.CortxActivity()     # load up the author activity pickle 
  people = cortx_community.CortxCommunity()             # load up the people pickle
  persistent_stats = cortx_community.PersistentStats()  # load up all the stats

  for repo in stx.get_repos():
    rname = repo.name # put this in a variable just in case it is a github API to fetch this
    if 'cortx' not in rname or rname.endswith('.old') or rname.endswith('-old') or repo.private:
      continue

    local_stats = copy.deepcopy(local_stats_template) # get an empty copy of the stats structure

    # Use this update if you just want to add some new data and don't want to wait for the very slow time
    # to scrape all activity.  Once you have finished the update, migrate the code out of the update block.
    # Typically we don't use update; only during development 
    # Note that update doesn't work for values that are incremented . . . 
    if update:
      (cached_local_stats,timestamp) = persistent_stats.get_latest(rname)  # load the cached version
      print("Fetched %s data for %s" % (timestamp, repo))
      for k,v in cached_local_stats.items():
        local_stats[k] = v
    else:
      get_top_level_repo_info(local_stats,repo,people=people,author_activity=author_activity,gh=gh,)
      get_issues_and_prs(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh)
      get_commits(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh)
      get_contributors(rname,repo,local_stats,people=people,gh=gh)

    # what we need to do is query when the last time this ran and then pass 'since' to get_commits

    # summarize info for this repo and persist the data structures
    summarize_consolidate(local_stats,global_stats,people=people,author_activity=author_activity,ave_age_str=ave_age_str)
    persist_author_activity(author_activity)
    persistent_stats.add_stats(date=today,repo=rname,stats=local_stats)
    persistent_stats.print_repo(rname,local_stats,date=today,verbose=False,csv=False)

  # do a bit of cleaning on global stats
  # print and persist the global consolidated stats

  # treat the 'ave_age_in_s' fields differently 
  # all those fields have consistent names: 'x_ave_age_in_s'
  # also, there will always be a corresponding field x which is the count
  for ave_age in [key for key in global_stats.keys() if ave_age_str in key]:
    item  = ave_age[0:len(ave_age)-len(ave_age_str)]
    try:
      global_stats[ave_age] /= global_stats[item]
    except ZeroDivisionError:
      global_stats[ave_age] = 0

  global_stats['top_referrers'] = consolidate_referrers(global_stats['top_referrers'])

  persistent_stats.print_repo('GLOBAL',global_stats,date=today,verbose=False,csv=False)
  persistent_stats.add_stats(date=today,repo='GLOBAL',stats=global_stats)
Ejemplo n.º 4
0
def main():
    parser = argparse.ArgumentParser(
        description='Retrieve all activity done by a particular user.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        'login',
        metavar='LOGIN',
        type=str,
        help=
        "Comma-separate lists of logins [can use External,Hackathon,EU R&D,Innersource,All,Unknown as wildcards]"
    )
    parser.add_argument('-s',
                        '--since',
                        type=str,
                        help="Only show activity since yyyy-mm-dd")
    parser.add_argument('-u',
                        '--until',
                        type=str,
                        help="Only show activity until yyyy-mm-dd")
    parser.add_argument('-w',
                        '--last_week',
                        action='store_true',
                        help="Only show activity in the last seven days")
    parser.add_argument('-m',
                        '--last_month',
                        action='store_true',
                        help="Only show activity in the last 30 days")
    parser.add_argument(
        '-d',
        '--details',
        action='store_true',
        help="Print stats for pulls and commits, also reports a total score")
    parser.add_argument(
        '-c',
        '--company',
        action='store_true',
        help=
        "Instead of looking up an individual, look up all folks from a particular company"
    )
    parser.add_argument('-l',
                        '--limit',
                        type=int,
                        help="Only show actions if gte to limit")
    parser.add_argument('-z',
                        '--zero',
                        action='store_true',
                        help="Show folks even if they have no actions")
    args = parser.parse_args()

    activity = cortx_community.CortxActivity()
    people = cortx_community.CortxCommunity()

    if args.since:
        args.since = dateparser.parse(args.since)
    if args.until:
        args.until = dateparser.parse(args.until)
    if args.last_week:
        args.since = datetime.datetime.today() - datetime.timedelta(days=7)
    if args.last_month:
        args.since = datetime.datetime.today() - datetime.timedelta(days=30)
    daterange = "since %s" % (args.since.strftime('%Y-%m-%d')
                              if args.since else "inception")
    if args.until:
        daterange += " until %s" % args.until.strftime('%Y-%m-%d')

    if args.details:
        gh = Github(os.environ.get('GH_OATH'))
        stx = gh.get_organization('Seagate')

    activities = {}
    logins = get_logins(args.login, people, args.company)
    for login in logins:
        activities[login] = {}
        try:
            # create a new structure to hold the data in an organization that is more easily sorted
            # go through the activity and save each into the new format
            # problem is that the watch event doesn't have a date for it . . .
            for (url, created_at) in activity.get_activities(login):
                if created_at is not None:  # just don't count watch events since they don't have a date
                    activities[login][created_at] = url
        except KeyError:
            pass
            #print("Login %s has no observed activity" % login)

    # using the new data structure, filter by since and until
    filtered_activities = {}
    if args.since or args.until:
        for login, actions in sorted(activities.items()):
            filtered_activities[login] = {}
            for d, u in sorted(actions.items()):
                if args.since and d < args.since:
                    continue
                if args.until and d > args.until:
                    continue
                filtered_activities[login][d] = u
    else:
        filtered_activities = activities

    # optionally filter by limit
    if args.limit:
        new_filtered = {}
        for login, actions in sorted(filtered_activities.items()):
            if len(actions) >= args.limit:
                new_filtered[login] = actions
        filtered_activities = new_filtered

    if len(logins) > 1:
        print("Getting activities from %d logins: %s" %
              (len(logins), sorted(logins)))

    # now print from the filtered list
    total_actions = 0
    for k in sorted(filtered_activities,
                    key=lambda k: len(filtered_activities[k]),
                    reverse=True):
        login = k
        actions = filtered_activities[k]
        #for login,actions in sorted(filtered_activities.items()):
        try:
            email = people.get_email(login)
            Type = people.get_type(login)
        except KeyError:
            email = None
            Type = None
        total_score = 0
        if len(actions) > 0 or args.zero:
            print("%d actions for %s [email %s, Type %s] %s" %
                  (len(actions), login, email, Type, daterange))
            total_actions += len(actions)
        for d, u in sorted(actions.items()):
            if args.details:
                (points, details) = get_details(u, stx)
                total_score += points
            print("\t-- %s %s %s %s" %
                  (login, d, u, details if args.details else ''))
        if len(actions) > 0 and args.details:
            print("\t%4.1f POINTS for %s" % (total_score, login))

    print("SUMMARY: %d total observed actions from %s %s" %
          (total_actions, args.login, daterange))
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser(
        description='Retrieve all activity done by a particular user.',
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
    parser.add_argument(
        'login',
        metavar='LOGIN',
        type=str,
        help=
        "Comma-separate lists of logins [can use External,Hackathon,Innersource,All,Unknown as wildcards]"
    )
    parser.add_argument('-s',
                        '--since',
                        type=str,
                        help="Only show activity since yyyy-mm-dd")
    parser.add_argument('-u',
                        '--until',
                        type=str,
                        help="Only show activity until yyyy-mm-dd")
    parser.add_argument('-l',
                        '--last_week',
                        action='store_true',
                        help="Only show activity in the last seven days")
    parser.add_argument(
        '-d',
        '--details',
        action='store_true',
        help="Print stats for pulls and commits, also reports a total score")
    args = parser.parse_args()

    activity = cortx_community.CortxActivity()

    if args.since:
        args.since = dateparser.parse(args.since)
    if args.until:
        args.until = dateparser.parse(args.until)
    if args.last_week:
        args.since = datetime.datetime.today() - datetime.timedelta(days=7)
    daterange = "since %s" % (args.since.strftime('%Y-%m-%d')
                              if args.since else "inception")
    if args.until:
        daterange += " until %s" % args.until.strftime('%Y-%m-%d')

    if args.details:
        gh = Github(os.environ.get('GH_OATH'))
        stx = gh.get_organization('Seagate')

    activities = {}
    logins = get_logins(args.login)
    for login in logins:
        activities[login] = {}
        try:
            # create a new structure to hold the data in an organization that is more easily sorted
            # go through the activity and save each into the new format
            # problem is that the watch event doesn't have a date for it . . .
            for (url, created_at) in activity.get_activity(login):
                if created_at is not None:  # just don't count watch events since they don't have a date
                    activities[login][created_at] = url
        except KeyError:
            pass
            #print("Login %s has no observed activity" % login)

    # using the new data structure, filter by since and until
    filtered_activities = {}
    if args.since or args.until:
        for login, actions in sorted(activities.items()):
            filtered_activities[login] = {}
            for d, u in sorted(actions.items()):
                if args.since and d < args.since:
                    continue
                if args.until and d > args.until:
                    continue
                filtered_activities[login][d] = u
    else:
        filtered_activities = activities

    # now print from the filtered list
    total_actions = 0
    for login, actions in sorted(filtered_activities.items()):
        total_score = 0
        if len(actions) > 0:
            print("%d actions for %s %s" % (len(actions), login, daterange))
            total_actions += len(actions)
        for d, u in sorted(actions.items()):
            if args.details:
                (points, details) = get_details(u, stx)
                total_score += points
            print("\t%s %s %s %s" %
                  (login, d, u, details if args.details else ''))
        if len(actions) > 0 and args.details:
            print("\t%4.1f POINTS for %s" % (total_score, login))

    print("SUMMARY: %d total observed actions from %s %s" %
          (total_actions, args.login, daterange))
Ejemplo n.º 6
0
def collect_stats(gh,org_name,update,prefix,top_only):
  avoid_rate_limiting(gh)
  today = datetime.today().strftime('%Y-%m-%d')

  # populate our persistent data structures from the pickles
  people = cortx_community.CortxCommunity(org_name)             
  author_activity = cortx_community.CortxActivity(org_name)     
  persistent_stats = cortx_community.PersistentStats(org_name)  

  # averages are weird so handle them differently
  ave_age_str='_ave_age_in_s'

  # the shared structure that we use for collecting stats
  global_stats = { 'branches'                      : 0, 
                   'clones_count_14_days'          : 0,
                   'clones_unique_14_days'         : 0,
                   'comments'                      : 0,
                   'commits'                       : 0, 
                   'companies_contributing'        : set(),
                   'companies'                     : set(), 
                   'contributors'                  : set(), 
                   'domains'                       : set(), 
                   'downloads_releases'            : 0,
                   'downloads_vms'                 : 0,
                   'email_addresses'               : set(), 
                   'external_comments'             : 0,
                   'external_email_addresses'      : set(),
                   'forks_external'                : set(),
                   'forks'                         : set(),
                   'logins'                        : set(), 
                   'new_external_activities'       : set(),
                   'new_logins'                    : set(),
                   'pull_requests_external_merged' : 0,
                   'pull_requests_internal_merged' : 0,
                   'pull_requests_merged'          : 0,
                   'seagate_blog_referrer_count'   : 0,
                   'seagate_blog_referrer_uniques' : 0,
                   'seagate_referrer_count'        : 0,
                   'seagate_referrer_uniques'      : 0,
                   'stars_external'                : set(),
                   'stars'                         : set(),
                   'top_paths'                     : [], 
                   'top_referrers'                 : [],
                   'views_count_14_days'           : 0,
                   'views_unique_14_days'          : 0,
                   'watchers_external'             : set(),
                   'watchers'                      : set(),
                    }
  load_actors(global_stats,people)
  load_items(global_stats,('issues','pull_requests'),('_external','_internal',''),('','_open','_closed','_open_ave_age_in_s','_closed_ave_age_in_s'))
  local_stats_template = copy.deepcopy(global_stats)    # save an empty copy of the stats struct to copy for each repo

  for repo in cortx_community.get_repos(org_name=org_name,prefix=prefix): 
    while True: # add a while loop since we are always failing and it would be good to run successfully more often
      try:
        local_stats = copy.deepcopy(local_stats_template) # get an empty copy of the stats structure
        rname=repo.name # just in case this requires a github API call, fetch it once and reuse it

        # Use this update if you just want to add some new data and don't want to wait for the very slow time
        # to scrape all activity.  Once you have finished the update, migrate the code out of the update block.
        # Typically we don't use update; only during development 
        # Note that update doesn't work for values that are incremented . . . 
        if update:
          (cached_local_stats,timestamp) = persistent_stats.get_latest(rname)  # load the cached version
          print("Fetched %s data for %s" % (timestamp, repo))
          for k,v in cached_local_stats.items():
            local_stats[k] = v
        else:
          get_top_level_repo_info(local_stats,repo,people=people,author_activity=author_activity,gh=gh,org_name=org_name)
          get_contributors(rname,repo,local_stats,people=people,gh=gh,org_name=org_name)
          if not top_only:
            get_issues_and_prs(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name)
            get_commits(rname,repo,local_stats,people=people,author_activity=author_activity,gh=gh,org_name=org_name)

        # what we need to do is query when the last time this ran and then pass 'since' to get_commits

        # summarize info for this repo and persist the data structures
        summarize_consolidate(local_stats,global_stats,people=people,author_activity=author_activity,ave_age_str=ave_age_str)
        persist_author_activity(author_activity)
        persistent_stats.add_stats(date=today,repo=rname,stats=local_stats)
        persistent_stats.print_repo(rname,local_stats,date=today,verbose=False,csv=False)
        break
      except Exception as e:
        print("WTF: Failed while getting stats for repo %s" % repo.name, e)
        avoid_rate_limiting(gh,Verbose=True)

  # do a bit of cleaning on global stats
  # print and persist the global consolidated stats

  # treat the 'ave_age_in_s' fields differently 
  # all those fields have consistent names: 'x_ave_age_in_s'
  # also, there will always be a corresponding field x which is the count
  for ave_age in [key for key in global_stats.keys() if ave_age_str in key]:
    item  = ave_age[0:len(ave_age)-len(ave_age_str)]
    try:
      global_stats[ave_age] /= global_stats[item]
    except ZeroDivisionError:
      global_stats[ave_age] = 0

  global_stats['top_referrers'] = consolidate_referrers(global_stats['top_referrers'])

  persistent_stats.print_repo('GLOBAL',global_stats,date=today,verbose=False,csv=False)
  persistent_stats.add_stats(date=today,repo='GLOBAL',stats=global_stats)