def test(): """ test """ indata = [0.1, 0.2, 0.3, 0.4, 0.4, 0.5, 0.1, 0.4] stats = RollingStats() for i in indata: stats.append(i) print stats.get() print calculate_stats(indata)
def get_chi2(tasks, bad_users): """Perform chi^2 test on the stats from each worker""" results = {'chi2': 0.0, 'dof': 0, 'p': 0.0, 'residual': {}} # Aggregate results by worker users = {} for task in tasks: task['worker_key'] = get_worker_key(task) if 'worker_info' not in task: continue key = get_worker_key(task) if key in bad_users: continue stats = task.get('stats', {}) wld = [float(stats.get('wins', 0)), float(stats.get('losses', 0)), float(stats.get('draws', 0))] if wld == [0.0, 0.0, 0.0]: continue if key in users: for idx in range(len(wld)): users[key][idx] += wld[idx] else: users[key] = wld if len(users) == 0: return results observed = numpy.array(users.values()) rows,columns = observed.shape df = (rows - 1) * (columns - 1) column_sums = numpy.sum(observed, axis=0) row_sums = numpy.sum(observed, axis=1) grand_total = numpy.sum(column_sums) if grand_total == 0: return results expected = numpy.outer(row_sums, column_sums) / grand_total diff = observed - expected adj = numpy.outer((1 - row_sums / grand_total), (1 - column_sums / grand_total)) residual = diff / numpy.sqrt(expected * adj) for idx in range(len(users)): users[users.keys()[idx]] = numpy.max(numpy.abs(residual[idx])) chi2 = numpy.sum(diff * diff / expected) return { 'chi2': chi2, 'dof': df, 'p': 1 - scipy.stats.chi2.cdf(chi2, df), 'residual': users, }
def calculate_residuals(run): bad_users = set() chi2 = get_chi2(run['tasks'], bad_users) residuals = chi2['residual'] # Limit bad users to 1 for now for _ in range(1): worst_user = {} for task in run['tasks']: if task['worker_key'] in bad_users: continue task['residual'] = residuals.get(task['worker_key'], 0.0) # Special case crashes or time losses stats = task.get('stats', {}) crashes = stats.get('crashes', 0) time_losses = stats.get('time_losses', 0) if crashes + time_losses > 3: task['residual'] = 8.0 if abs(task['residual']) < 2.0: task['residual_color'] = '#44EB44' elif abs(task['residual']) < 2.7: task['residual_color'] = 'yellow' else: task['residual_color'] = '#FF6A6A' if chi2['p'] < 0.01 or task['residual'] > 7.0: if len(worst_user) == 0 or task['residual'] > worst_user['residual']: worst_user['worker_key'] = task['worker_key'] worst_user['residual'] = task['residual'] if len(worst_user) == 0: break bad_users.add(worst_user['worker_key']) residuals = get_chi2(run['tasks'], bad_users)['residual'] chi2['bad_users'] = bad_users return chi2
def get_chi2(tasks, bad_users): """ Perform chi^2 test on the stats from each worker """ results = {"chi2": 0.0, "dof": 0, "p": 0.0, "residual": {}} # Aggregate results by worker users = {} for task in tasks: task["worker_key"] = get_worker_key(task) if "worker_info" not in task: continue key = get_worker_key(task) if key in bad_users: continue stats = task.get("stats", {}) wld = [ float(stats.get("wins", 0)), float(stats.get("losses", 0)), float(stats.get("draws", 0)), ] if wld == [0.0, 0.0, 0.0]: continue if key in users: for idx in range(len(wld)): users[key][idx] += wld[idx] else: users[key] = wld if len(users) == 0: return results observed = numpy.array(list(users.values())) rows, columns = observed.shape # Results only from one worker: skip the test for workers homogeneity if rows == 1: return {"chi2": float("nan"), "dof": 0, "p": float("nan"), "residual": {}} column_sums = numpy.sum(observed, axis=0) columns_not_zero = sum(i > 0 for i in column_sums) df = (rows - 1) * (columns - 1) if columns_not_zero == 0: return results # Results only of one type: workers are identical wrt the test elif columns_not_zero == 1: results = {"chi2": 0.0, "dof": df, "p": 1.0, "residual": {}} return results # Results only of two types: workers are identical wrt the missing result type # Change the data shape to avoid divide by zero elif columns_not_zero == 2: idx = numpy.argwhere(numpy.all(observed[..., :] == 0, axis=0)) observed = numpy.delete(observed, idx, axis=1) column_sums = numpy.sum(observed, axis=0) row_sums = numpy.sum(observed, axis=1) grand_total = numpy.sum(column_sums) expected = numpy.outer(row_sums, column_sums) / grand_total raw_residual = observed - expected std_error = numpy.sqrt( expected * numpy.outer((1 - row_sums / grand_total), (1 - column_sums / grand_total)) ) adj_residual = raw_residual / std_error for idx in range(len(users)): users[list(users.keys())[idx]] = numpy.max(numpy.abs(adj_residual[idx])) chi2 = numpy.sum(raw_residual * raw_residual / expected) return { "chi2": chi2, "dof": df, "p": 1 - scipy.stats.chi2.cdf(chi2, df), "residual": users, }
def get_chi2(tasks, bad_users): """Perform chi^2 test on the stats from each worker""" results = {"chi2": 0.0, "dof": 0, "p": 0.0, "residual": {}} # Aggregate results by worker users = {} has_pentanomial = None for task in tasks: task["worker_key"] = get_worker_key(task) if "worker_info" not in task: continue key = get_worker_key(task) if key in bad_users: continue stats = task.get("stats", {}) if has_pentanomial is None: has_pentanomial = "pentanomial" in stats if not has_pentanomial: wld = [ float(stats.get("wins", 0)), float(stats.get("losses", 0)), float(stats.get("draws", 0)), ] else: p = stats["pentanomial"] # The ww and ll frequencies will typically be too small for # the full pentanomial chi2 test to be valid. See e.g. the last page of # https://www.open.ac.uk/socialsciences/spsstutorial/files/tutorials/chi-square.pdf. # So we combine the ww and ll frequencies with the wd and ld frequencies. wld = [float(p[4] + p[3]), float(p[0] + p[1]), float(p[2])] if wld == [0.0, 0.0, 0.0]: continue if key in users: for idx in range(len(wld)): users[key][idx] += wld[idx] else: users[key] = wld if len(users) == 0: return results observed = numpy.array(list(users.values())) rows, columns = observed.shape # Results only from one worker: skip the test for workers homogeneity if rows == 1: return { "chi2": float("nan"), "dof": 0, "p": float("nan"), "residual": {} } column_sums = numpy.sum(observed, axis=0) columns_not_zero = numpy.count_nonzero(column_sums) df = (rows - 1) * (columns - 1) if columns_not_zero == 0: return results # Results only of one type: workers are homogeneous elif columns_not_zero == 1: return {"chi2": 0.0, "dof": df, "p": 1.0, "residual": {}} # Results only of two types: drop the column of zeros to avoid divide by zero elif columns_not_zero == 2: observed = observed[:, ~numpy.all(observed == 0, axis=0)] column_sums = numpy.sum(observed, axis=0) row_sums = numpy.sum(observed, axis=1) grand_total = numpy.sum(column_sums) expected = numpy.outer(row_sums, column_sums) / grand_total raw_residual = observed - expected std_error = numpy.sqrt(expected * numpy.outer( (1 - row_sums / grand_total), (1 - column_sums / grand_total))) adj_residual = raw_residual / std_error for idx in range(len(users)): users[list(users.keys())[idx]] = numpy.max(numpy.abs( adj_residual[idx])) chi2 = numpy.sum(raw_residual * raw_residual / expected) return { "chi2": chi2, "dof": df, "p": 1 - scipy.stats.chi2.cdf(chi2, df), "residual": users, }