def local_1(args, computation_phase): input_list = args['input'] X = input_list['covariates'] y = input_list['dependents'] lamb = input_list['lambda'] biased_X = np.insert(X, 0, 1, axis=1) ## step 1 : generate the local beta_vector beta_vector = reg.one_shot_regression(X, y, lamb) ## step 2: generate the local fit statistics local r^2, t and p r_2 = reg.r_square(biased_X, y, beta_vector) ts_beta = reg.t_value(biased_X, y, beta_vector) dof = len(y) - len(beta_vector) ps_beta = reg.t_to_p(dof, ts_beta) ## step 3: generate the mean_y_local and count_local mean_y_local = np.mean(y) count_local = len(y) computation_output = json.dumps( { 'output': { 'beta_vector_local': beta_vector.tolist(), 'r_2_local': r_2, 'ts_local': ts_beta.tolist(), 'ps_local': ps_beta, 'mean_y_local': mean_y_local, 'count_local': count_local, 'computation_phase': computation_phase }, 'cache': { 'covariates': X, 'dependents': y, 'lambda': lamb } }, sort_keys=True, indent=4, separators=(',', ': ')) return computation_output
def remote_2(args, computation_phase): ## calculate the global model fit statistics,r_2_global, ts_global, ps_global cache_list = args['cache'] input_list = args['input'] avg_beta_vector = cache_list['avg_beta_vector'] dof_global = cache_list['dof_global'] n_site = len(input_list) SSE_global = 0 SST_global = 0 varX_matrix_global = [] for i in range(0, n_site): SSE_global = SSE_global + input_list[i]['SSE_local'] SST_global = SST_global + input_list[i]['SST_local'] varX_matrix_global = varX_matrix_global + input_list[i]['varX_matrix_local'] r_2_global = 1 - (SSE_global/SST_global) MSE = (1/dof_global)*SSE_global var_beta_global = MSE*(np.linalg.inv(varX_matrix_global).diagonal()) se_beta_global = np.sqrt(var_beta_global) ts_global = avg_beta_vector / se_beta_global ps_global = reg.t_to_p(dof_global,ts_global) computation_output = json.dumps({'output':{'avg_beta_vector': cache_list['avg_beta_vector'], 'beta_vector_local': cache_list['beta_vector_local'], 'r_2_global': r_2_global, 'ts_global': ts_global, 'ps_global': ps_global, 'r_2_local': cache_list['r_2_local'], 'ts_local': cache_list['ts_local'], 'ps_local': cache_list['ps_local'], 'dof_global': cache_list['dof_global'], 'dof_local': cache_list['dof_local'], 'complete': True} }, sort_keys=True, indent=4, separators=(',', ': ')) return computation_output
def remote_4(args): """ Computes the global model fit statistics, r_2_global, ts_global, ps_global Args: args (dictionary): {"input": { "SSE_local": , "SST_local": , "varX_matrix_local": , "computation_phase": }, "cache":{}, } Returns: computation_output (json) : {"output": { "avg_beta_vector": , "beta_vector_local": , "r_2_global": , "ts_global": , "ps_global": , "dof_global": }, "success": } Comments: Generate the local fit statistics r^2 : goodness of fit/coefficient of determination Given as 1 - (SSE/SST) where SSE = Sum Squared of Errors SST = Total Sum of Squares t : t-statistic is the coefficient divided by its standard error. Given as beta/std.err(beta) p : two-tailed p-value (The p-value is the probability of seeing a result as extreme as the one you are getting (a t value as large as yours) in a collection of random data in which the variable had no effect.) """ input_list = args["input"] y_labels = args["cache"]["y_labels"] all_local_stats_dicts = args["cache"]["all_local_stats_dicts"] cache_list = args["cache"] avg_beta_vector = cache_list["avg_beta_vector"] dof_global = cache_list["dof_global"] SSE_global = sum( [np.array(input_list[site]["SSE_local"]) for site in input_list]) SST_global = sum( [np.array(input_list[site]["SST_local"]) for site in input_list]) varX_matrix_global = sum([ np.array(input_list[site]["varX_matrix_local"]) for site in input_list ]) r_squared_global = 1 - (SSE_global / SST_global) MSE = SSE_global / np.array(dof_global) ts_global = [] ps_global = [] for i in range(len(MSE)): var_covar_beta_global = MSE[i] * sp.linalg.inv(varX_matrix_global) se_beta_global = np.sqrt(var_covar_beta_global.diagonal()) ts = avg_beta_vector[i] / se_beta_global ps = reg.t_to_p(ts, dof_global[i]) ts_global.append(ts) ps_global.append(ps) # Block of code to print local stats as well sites = ['Site_' + str(i) for i in range(len(all_local_stats_dicts))] all_local_stats_dicts = list(map(list, zip(*all_local_stats_dicts))) a_dict = [{ key: value for key, value in zip(sites, all_local_stats_dicts[i]) } for i in range(len(all_local_stats_dicts))] # Block of code to print just global stats keys1 = [ "avg_beta_vector", "r2_global", "ts_global", "ps_global", "dof_global" ] global_dict_list = [] for index, _ in enumerate(y_labels): values = [ avg_beta_vector[index], r_squared_global[index], ts_global[index].tolist(), ps_global[index], dof_global[index] ] my_dict = {key: value for key, value in zip(keys1, values)} global_dict_list.append(my_dict) # Print Everything dict_list = [] keys2 = ["ROI", "global_stats", "local_stats"] for index, label in enumerate(y_labels): values = [label, global_dict_list[index], a_dict[index]] my_dict = {key: value for key, value in zip(keys2, values)} dict_list.append(my_dict) computation_output = { "output": { "regressions": dict_list }, "success": True } return json.dumps(computation_output)
def remote_3(args): """ Computes the global model fit statistics, r_2_global, ts_global, ps_global Args: args (dictionary): {"input": { "SSE_local": , "SST_local": , "varX_matrix_local": , "computation_phase": }, "cache":{}, } Returns: computation_output (json) : {"output": { "avg_beta_vector": , "beta_vector_local": , "r_2_global": , "ts_global": , "ps_global": , "dof_global": }, "success": } Comments: Generate the local fit statistics r^2 : goodness of fit/coefficient of determination Given as 1 - (SSE/SST) where SSE = Sum Squared of Errors SST = Total Sum of Squares t : t-statistic is the coefficient divided by its standard error. Given as beta/std.err(beta) p : two-tailed p-value (The p-value is the probability of seeing a result as extreme as the one you are getting (a t value as large as yours) in a collection of random data in which the variable had no effect.) """ input_list = args["input"] cache_list = args["cache"] avg_beta_vector = cache_list["avg_beta_vector"] dof_global = cache_list["dof_global"] SSE_global = np.sum([input_list[site]["SSE_local"] for site in input_list]) SST_global = np.sum([input_list[site]["SST_local"] for site in input_list]) varX_matrix_global = sum([ np.array(input_list[site]["varX_matrix_local"]) for site in input_list ]) r_squared_global = 1 - (SSE_global / SST_global) MSE = SSE_global / dof_global var_covar_beta_global = MSE * sp.linalg.inv(varX_matrix_global) se_beta_global = np.sqrt(var_covar_beta_global.diagonal()) ts_global = avg_beta_vector / se_beta_global ps_global = reg.t_to_p(ts_global, dof_global) computation_output = { "output": { "avg_beta_vector": cache_list["avg_beta_vector"], "r_2_global": r_squared_global, "ts_global": ts_global.tolist(), "ps_global": ps_global, "dof_global": cache_list["dof_global"] }, "success": True } return json.dumps(computation_output)
def local_1(args, computation_phase): """Computes local beta vector and local fit statistics Args: args (dictionary) : {"input": {"covariates": , "dependents": , "lambda": }, "cache": {} } computation_phase (string) : Field specifying which part (local/ remote) of the decentralized computation has been performed last In this case, it has to be empty Returns: computation_output(json) : { 'output': { 'beta_vector_local': , 'r_2_local': , 'ts_local': , 'ps_local': , 'mean_y_local': , 'count_local': , 'computation_phase': }, 'cache': { 'covariates': , 'dependents': , 'lambda': } } Comments: Step 1 : Generate the local beta_vector Step 2 : Generate the local fit statistics r^2 : goodness of fit/coefficient of determination Given as 1 - (SSE/SST) where SSE = Sum Squared of Errors SST = Total Sum of Squares t : t-statistic is the coefficient divided by its standard error. Given as beta/std.err(beta) p : two-tailed p-value (The p-value is the probability of seeing a result as extreme as the one you are getting (a t value as large as yours) in a collection of random data in which the variable had no effect.) Step 3 : Compute mean_y_local and length of target values """ input_list = args['input'] X = input_list['covariates'] y = input_list['dependents'] lamb = input_list['lambda'] biased_X = np.insert(X, 0, 1, axis=1) beta_vector = reg.one_shot_regression(X, y, lamb) r_squared = reg.r_square(biased_X, y, beta_vector) ts_beta = reg.t_value(biased_X, y, beta_vector) dof = len(y) - len(beta_vector) ps_beta = reg.t_to_p(ts_beta, dof) computation_output_dict = { 'output': { 'beta_vector_local': beta_vector.tolist(), 'r_2_local': r_squared, 'ts_local': ts_beta.tolist(), 'ps_local': ps_beta, 'mean_y_local': np.mean(y), 'count_local': len(y), 'computation_phase': computation_phase }, 'cache': { 'covariates': X, 'dependents': y, 'lambda': lamb } } return json.dumps(computation_output_dict, sort_keys=True, indent=4, separators=(',', ': '))
def remote_2(args): """ # calculate the global model fit statistics, r_2_global, ts_global, # ps_global Args: args (dictionary): { 'output': { 'SSE_local': , 'SST_local': , 'varX_matrix_local': , 'computation_phase': } } computation_phase (String): field specifying which part (local/remote) of the decentralized computation has been performed last In this case, it has to be empty Returns: computation_output (json) : { 'output': { 'avg_beta_vector': , 'beta_vector_local': , 'r_2_global': , 'ts_global': , 'ps_global': , 'r_2_local': , 'ts_local': , 'ps_local': , 'dof_global': , 'dof_local': , 'complete': } } """ cache_list = args['cache'] input_list = args['input'] avg_beta_vector = cache_list['avg_beta_vector'] dof_global = cache_list['dof_global'] SSE_global = np.sum([input_list[site]['SSE_local'] for site in input_list]) SST_global = np.sum([input_list[site]['SST_local'] for site in input_list]) varX_matrix_global = sum([ np.array(input_list[site]['varX_matrix_local']) for site in input_list ]) r_squared_global = 1 - (SSE_global / SST_global) MSE = SSE_global / dof_global var_covar_beta_global = MSE * np.linalg.inv(varX_matrix_global) se_beta_global = np.sqrt(var_covar_beta_global.diagonal()) ts_global = avg_beta_vector / se_beta_global ps_global = reg.t_to_p(ts_global, dof_global) computation_output_dict = { 'output': { 'avg_beta_vector': cache_list['avg_beta_vector'], 'r_2_global': r_squared_global, 'ts_global': ts_global.tolist(), 'ps_global': ps_global, 'dof_global': cache_list['dof_global'], }, 'success': True } return json.dumps(computation_output_dict)