def remote_site(args, computation_phase): ''' It will receive parameters from dsne_multi_shot. After receiving parameters it will compute tsne on high dimensional remote data and pass low dimensional values of remote site data args (dictionary): { "shared_X" (str): remote site data "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "max_iter" (str): maximum number of iterations during tsne computation } computation_phase (string): remote normalize_columns: Shared data is normalized through this function Returns: Return args will contain previous args value in addition of Y[low dimensional Y values] values of shared_Y. args(dictionary): { "shared_X" (str): remote site data, "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" : the low-dimensional remote site data } ''' shared_X = np.loadtxt(args["shared_X"]) # sharedLabel = np.loadtxt(args.run["shared_Label"]) no_dims = args["no_dims"] initial_dims = args["initial_dims"] perplexity = args["perplexity"] shared_X = normalize_columns(shared_X) (sharedRows, sharedColumns) = shared_X.shape init_Y = np.random.randn(sharedRows, no_dims) # shared data computation in tsne shared_Y = tsne( shared_X, init_Y, sharedRows, no_dims, initial_dims, perplexity, computation_phase=computation_phase) with open("Y_values.txt", "w") as f: for i in range(0, len(shared_Y)): f.write(str(shared_Y[i][0]) + '\t') f.write(str(shared_Y[i][1]) + '\n') args["shared_Y"] = "Y_values.txt" return args
def local_site1(args, compAvgError, computation_phase): ''' It will load local data and download remote data and place it on top. Then it will run tsne on combined data(shared + local) and return low dimensional shared Y and IY args (dictionary): { "shared_X" (str): file path to remote site data, "shared_Label" (str): file path to remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" (str): the low-dimensional remote site data } Returns: computation_phase(local): It will return only low dimensional shared data from local site computation_phase(final): It will return only low dimensional local site data computation_phase(computation): It will return only low dimensional shared data Y and corresponding IY ''' C= 0 if computation_phase is 'local': shared_X = np.loadtxt(args["shared_X"]) shared_Y = np.loadtxt(args["shared_Y"]) no_dims = args["no_dims"] initial_dims = args["initial_dims"] perplexity = args["perplexity"] local_site1.sharedRows, local_site1.sharedColumns = shared_X.shape parser = argparse.ArgumentParser( description='''read in coinstac args for local computation''') parser.add_argument('--run', type=json.loads, help='grab coinstac args') localSite1_Data = ''' { "site1_Data": "Site_1_Mnist_X.txt", "site1_Label": "Site_1_Label.txt" } ''' site1args = parser.parse_args(['--run', localSite1_Data]) Site1Data = np.loadtxt(site1args.run["site1_Data"]) # create combinded list by local and remote data combined_X = np.concatenate((shared_X, Site1Data), axis=0) combined_X = normalize_columns(combined_X) # create low dimensional position combined_Y = np.random.randn(combined_X.shape[0], no_dims) combined_Y[:shared_Y.shape[0], :] = shared_Y local_site1.Y, local_site1.dY, local_site1.iY, local_site1.gains, local_site1.P, local_site1.n = tsne( combined_X, combined_Y, local_site1.sharedRows, no_dims=no_dims, initial_dims=initial_dims, perplexity=perplexity, computation_phase=computation_phase) # save local site sharedIY data into file with open("site1SharedY.txt", "w") as f1: for i in range(0, local_site1.sharedRows): f1.write(str(local_site1.Y[i][0]) + '\t') f1.write(str(local_site1.Y[i][1]) + '\n') # pass data to remote in json format localJsonY = ''' {"localSite1SharedY": "site1SharedY.txt"} ''' sharedY = parser.parse_args(['--run', localJsonY]) return (sharedY.run) if computation_phase is 'computation': parser = argparse.ArgumentParser(description='''read in coinstac args for local computation''') parser.add_argument('--run', type=json.loads, help='grab coinstac args') shared_Y = np.loadtxt(args["shared_Y"]) local_site1.Y[:local_site1.sharedRows, :] = shared_Y; compAvgError1 = parser.parse_args(['--run', compAvgError]) C = compAvgError1.run['output']['error'] demeanAvg = (np.mean(local_site1.Y, 0)); demeanAvg[0]= compAvgError1.run['output']['avgX'] demeanAvg[1] = compAvgError1.run['output']['avgY'] local_site1.Y = demeanL(local_site1.Y, demeanAvg) local_site1.Y, iY, local_site1.Q, C, local_site1.P = master_child(local_site1.Y, local_site1.dY, local_site1.iY, local_site1.gains, local_site1.n, local_site1.sharedRows, local_site1.P, iter, C) local_site1.Y[local_site1.sharedRows:, :] = updateL(local_site1.Y[local_site1.sharedRows:, :], local_site1.iY[local_site1.sharedRows:, :]) # save local site sharedY data into file with open("site1SharedY.txt", "w") as f1: for i in range(0,local_site1.sharedRows): f1.write(str(local_site1.Y[i][0]) + '\t') f1.write(str(local_site1.Y[i][1]) + '\n') # pass data to remote in json format localJson = ''' {"localSite1SharedY": "site1SharedY.txt"} ''' sharedY = parser.parse_args(['--run', localJson]) # save local site sharedIY data into file with open("site1SharedIY.txt", "w") as f1: for i in range(0, local_site1.sharedRows): f1.write(str(local_site1.iY[i][0]) + '\t') f1.write(str(local_site1.iY[i][1]) + '\n') # pass data to remote in json format localJsonIY = ''' {"localSite1SharedIY": "site1SharedIY.txt"} ''' sharedIY = parser.parse_args(['--run', localJsonIY]) meanValue = (np.mean(local_site1.Y, 0)); comp = {'output': {'MeanX' : meanValue[0], 'MeanY' : meanValue[1], 'error': C}} return (sharedY.run, sharedIY.run, json.dumps(comp, sort_keys=True,indent=4,separators=(',' ,':'))) if computation_phase is 'final': '''It will add only local site data in the dictionary''' parser = argparse.ArgumentParser(description='''read in coinstac args for local computation''') parser.add_argument('--run', type=json.loads, help='grab coinstac args') with open("local_site1Y.txt", "w") as f1: for i in range(local_site1.sharedRows, len(local_site1.Y)): f1.write(str(local_site1.Y[i][0]) + '\t') f1.write(str(local_site1.Y[i][1]) + '\n') localJsonY = ''' {"localSite1": "local_site1Y.txt"} ''' sharedY = parser.parse_args(['--run', localJsonY]) return(sharedY.run)
def local_1(args): ''' It will load local data and download remote data and place it on top. Then it will run tsne on combined data(shared + local) and return low dimensional shared Y and IY args (dictionary): { "shared_X" (str): file path to remote site data, "shared_Label" (str): file path to remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" (str): the low-dimensional remote site data } Returns: computation_phase(local): It will return only low dimensional shared data from local site computation_phase(final): It will return only low dimensional local site data computation_phase(computation): It will return only low dimensional shared data Y and corresponding IY ''' shared_X = np.load(os.path.join(args['state']['baseDirectory'], args['input']['shared_X']), allow_pickle=True) shared_Y = np.load(os.path.join(args['state']['baseDirectory'], args['input']['shared_y']), allow_pickle=True) #raise Exception(shared_Y) no_dims = args["cache"]["no_dims"] initial_dims = args["cache"]["initial_dims"] perplexity = args["cache"]["perplexity"] sharedRows, sharedColumns = shared_X.shape with open( os.path.join(args["state"]["baseDirectory"], 'local_site_data.txt')) as fh: Site1Data = np.loadtxt(fh.readlines()) Site1Data = np.asarray(Site1Data) # create combinded list by local and remote data combined_X = np.concatenate((shared_X, Site1Data), axis=0) combined_X = normalize_columns(combined_X) # create low dimensional position combined_Y = np.random.randn(combined_X.shape[0], no_dims) combined_Y[:shared_Y.shape[0], :] = shared_Y local_Y, local_dY, local_iY, local_gains, local_P, local_n = tsne( combined_X, combined_Y, sharedRows, no_dims=no_dims, initial_dims=initial_dims, perplexity=perplexity, computation_phase="local") local_shared_Y = local_Y[:shared_Y.shape[0], :] local_shared_IY = local_iY[:shared_Y.shape[0], :] # Save file for transferring to remote np.save( os.path.join(args['state']['transferDirectory'], 'local_shared_Y.npy'), local_shared_Y) np.save( os.path.join(args['state']['transferDirectory'], 'local_shared_IY.npy'), local_shared_IY) #save file in local cache directory np.save(os.path.join(args['state']['cacheDirectory'], 'local_Y.npy'), local_Y) np.save(os.path.join(args['state']['cacheDirectory'], 'local_dY.npy'), local_dY) np.save(os.path.join(args['state']['cacheDirectory'], 'local_IY.npy'), local_iY) np.save(os.path.join(args['state']['cacheDirectory'], 'local_P.npy'), local_P) np.save(os.path.join(args['state']['cacheDirectory'], 'local_gains.npy'), local_gains) np.save(os.path.join(args['state']['cacheDirectory'], 'shared_Y.npy'), shared_Y) computation_output = \ { "output": { "localSite1SharedY": 'local_shared_Y.npy', 'computation_phase': 'local_1', }, "cache": { "local_Y": 'local_Y.npy', "local_dY": 'local_dY.npy', "local_IY": 'local_IY.npy', "local_P": 'local_P.npy', "local_n": local_n, "local_gains": 'local_gains.npy', "shared_rows": sharedRows, "shared_Y": 'shared_Y.npy' } } return json.dumps(computation_output)
def local_1(args): ''' It will load local data and download remote data and place it on top. Then it will run tsne on combined data(shared + local) and return low dimensional shared Y and IY args (dictionary): { "shared_X" (str): file path to remote site data, "shared_Label" (str): file path to remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" (str): the low-dimensional remote site data } Returns: computation_phase(local): It will return only low dimensional shared data from local site computation_phase(final): It will return only low dimensional local site data computation_phase(computation): It will return only low dimensional shared data Y and corresponding IY ''' # corresponds to local #raise Exception( 'local_1 function startings') #shared_X = np.loadtxt('test/input/simulatorRun/shared_x.txt') shared_X = np.loadtxt('test/input/simulatorRun/mnist2500_X.txt') #shared_X = np.loadtxt('test/input/simulatorRun/test_high_dimensional_mnist_data.txt') shared_Y = np.array(args["input"]["shared_y"]) #raise Exception(shared_Y.shape) no_dims = args["cache"]["no_dims"] initial_dims = args["cache"]["initial_dims"] perplexity = args["cache"]["perplexity"] sharedRows, sharedColumns = shared_X.shape #Site1Data = np.loadtxt('test/input/simulatorRun/site1_x.txt') #Site1Data = np.loadtxt('test/input/simulatorRun/site1_x_high_dimensions.txt') Site1Data = np.loadtxt('test/input/simulatorRun/test_high_dimensional_site_1_mnist_data.txt') # create combinded list by local and remote data combined_X = np.concatenate((shared_X, Site1Data), axis=0) combined_X = normalize_columns(combined_X) # create low dimensional position combined_Y = np.random.randn(combined_X.shape[0], no_dims) combined_Y[:shared_Y.shape[0], :] = shared_Y local_Y, local_dY, local_iY, local_gains, local_P, local_n = tsne( combined_X, combined_Y, sharedRows, no_dims=no_dims, initial_dims=initial_dims, perplexity=perplexity, computation_phase="local") local_shared_Y = local_Y[:shared_Y.shape[0], :] local_shared_IY = local_iY[:shared_Y.shape[0], :] #raise Exception(np.shape(local_shared_Y)) #raise Exception(sys.getsizeof(local_shared_Y)) computation_output = \ { "output": { "localSite1SharedY": local_shared_Y.tolist(), "localSite1SharedIY": local_shared_IY.tolist(), 'computation_phase': "local_1" }, "cache": { "local_Y": local_Y.tolist(), "local_dY": local_dY.tolist(), "local_iY": local_iY.tolist(), "local_P": local_P.tolist(), "local_n": local_n, "local_gains": local_gains.tolist(), "shared_rows": sharedRows, "shared_y": shared_Y.tolist() } } return json.dumps(computation_output)
def remote_1(args): ''' It will receive parameters from dsne_multi_shot. After receiving parameters it will compute tsne on high dimensional remote data and pass low dimensional values of remote site data args (dictionary): { "shared_X" (str): remote site data "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "max_iter" (str): maximum number of iterations during tsne computation } computation_phase (string): remote normalize_columns: Shared data is normalized through this function Returns: Return args will contain previous args value in addition of Y[low dimensional Y values] values of shared_Y. args(dictionary): { "shared_X" (str): remote site data, "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" : the low-dimensional remote site data } ''' shared_X = np.loadtxt('test/input/simulatorRun/mnist2500_X.txt') shared_Labels = np.loadtxt('test/input/simulatorRun/mnist2500_labels.txt') #shared_X = np.loadtxt('test/input/simulatorRun/shared_x.txt') #shared_Labels = np.loadtxt('test/input/simulatorRun/shared_y.txt') #shared_X = np.loadtxt('test/input/simulatorRun/test_high_dimensional_mnist_data.txt') #shared_Labels = np.loadtxt('test/input/simulatorRun/test_high_dimensional_mnist_label.txt') no_dims = args["input"]["local0"]["no_dims"] initial_dims = args["input"]["local0"]["initial_dims"] perplexity = args["input"]["local0"]["perplexity"] max_iter = args["input"]["local0"]["max_iterations"] shared_X = normalize_columns(shared_X) (sharedRows, sharedColumns) = shared_X.shape init_Y = np.random.randn(sharedRows, no_dims) shared_Y = tsne(shared_X, init_Y, sharedRows, no_dims, initial_dims, perplexity, computation_phase="remote") #raise Exception(shared_X) computation_output = { "output": { "shared_y": shared_Y.tolist(), "computation_phase": 'remote_1', }, "cache": { "shared_y": shared_Y.tolist(), "max_iterations": max_iter } } #raise Exception(shared_Y.shape) return json.dumps(computation_output)
def remote_1(args): ''' It will receive parameters from dsne_multi_shot. After receiving parameters it will compute tsne on high dimensional remote data and pass low dimensional values of remote site data args (dictionary): { "shared_X" (str): remote site data "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "max_iter" (str): maximum number of iterations during tsne computation } computation_phase (string): remote normalize_columns: Shared data is normalized through this function Returns: Return args will contain previous args value in addition of Y[low dimensional Y values] values of shared_Y. args(dictionary): { "shared_X" (str): remote site data, "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" : the low-dimensional remote site data } ''' with open(os.path.join(args["state"]["baseDirectory"], 'mnist2500_X.txt')) as fh: shared_X = np.loadtxt(fh.readlines()) with open( os.path.join(args["state"]["baseDirectory"], 'mnist2500_labels.txt')) as fh1: shared_Labels = np.loadtxt(fh1.readlines()) no_dims = args["input"]["local0"]["no_dims"] initial_dims = args["input"]["local0"]["initial_dims"] perplexity = args["input"]["local0"]["perplexity"] max_iter = args["input"]["local0"]["max_iterations"] shared_X = normalize_columns(shared_X) (sharedRows, sharedColumns) = shared_X.shape np.random.seed() init_Y = np.random.randn(sharedRows, no_dims) shared_Y = tsne(shared_X, init_Y, sharedRows, no_dims, initial_dims, perplexity, computation_phase="remote") np.save(os.path.join(args['state']['transferDirectory'], 'shared_Y.npy'), shared_Y) np.save(os.path.join(args['state']['cacheDirectory'], 'shared_Y.npy'), shared_Y) np.save(os.path.join(args['state']['transferDirectory'], 'shared_X.npy'), shared_X) computation_output = { "output": { "shared_y": 'shared_Y.npy', "shared_X": 'shared_X.npy', "computation_phase": 'remote_1', }, "cache": { "shared_y": 'shared_Y.npy', "max_iterations": max_iter } } return json.dumps(computation_output)
def local_1(args): ''' It will load local data and download remote data and place it on top. Then it will run tsne on combined data(shared + local) and return low dimensional local site data args (dictionary): { "shared_X" (str): remote site data, "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" (str): the low-dimensional remote site data } computation_phase : local Returns: localY: It is the two dimensional value of only local site data ''' shared_X = args["input"]["shared_x"] shared_Y = args["input"]["shared_y"] shared_X = np.array(shared_X) shared_Y = np.array(shared_Y) no_dims = args["cache"]["no_dims"] initial_dims = args["cache"]["initial_dims"] perplexity = args["cache"]["perplexity"] sharedRows, sharedColumns = shared_X.shape local_X = np.loadtxt('test/input/simulatorRun/site1_x.txt') # local_Y = np.loadtxt('test/input/simulatorRun/site1_y.txt') (site1Rows, site1Columns) = local_X.shape # create combinded list by local and remote data. # In combined_X remote data will be placed on the # top of local site data combined_X = np.concatenate((shared_X, local_X), axis=0) combined_X = normalize_columns(combined_X) # create low dimensional position combined_Y = np.random.randn(combined_X.shape[0], no_dims) combined_Y[:shared_Y.shape[0], :] = shared_Y # local data computation in tsne. Basically here local indicates Combined data(remote data is placed on the top of local site data). Computation specifications are described in 'tsneFunctions' Y_plot = tsne(combined_X, combined_Y, sharedRows, no_dims=no_dims, initial_dims=initial_dims, perplexity=perplexity, computation_phase='local') local_embedding = Y_plot[shared_Y.shape[0]:, :] computation_output = { "output": { "computation_phase": 'local_site', "local_embedding": local_embedding.tolist() }, "cache": {} } return json.dumps(computation_output)
def local_site(args, computation_phase): ''' It will load local data and download remote data and place it on top. Then it will run tsne on combined data(shared + local) and return low dimensional local site data args (dictionary): { "shared_X" (str): remote site data, "shared_Label" (str): remote site labels "no_dims" (int): Final plotting dimensions, "initial_dims" (int): number of dimensions that PCA should produce "perplexity" (int): initial guess for nearest neighbor "shared_Y" (str): the low-dimensional remote site data } computation_phase : local Returns: localY: It is the two dimensional value of only local site data ''' shared_X = np.loadtxt(args["shared_X"]) shared_Y = np.loadtxt(args["shared_Y"]) no_dims = args["no_dims"] initial_dims = args["initial_dims"] perplexity = args["perplexity"] sharedRows, sharedColumns = shared_X.shape # load high dimensional local site data parser = argparse.ArgumentParser( description='''read in coinstac args for local computation''') parser.add_argument('--run', type=json.loads, help='grab coinstac args') # load local site data from local memory localSite1_Data = ''' { "site1_Data": "Site_1_Mnist_X.txt", "site1_Label": "Site_1_Label.txt" } ''' site1args = parser.parse_args(['--run', localSite1_Data]) Site1Data = np.loadtxt(site1args.run["site1_Data"]) (site1Rows, site1Columns) = Site1Data.shape # create combinded list by local and remote data. In combined_X remote data will be placed on the top of local site data combined_X = np.concatenate((shared_X, Site1Data), axis=0) combined_X = normalize_columns(combined_X) # create low dimensional position combined_Y = np.random.randn(combined_X.shape[0], no_dims) combined_Y[:shared_Y.shape[0], :] = shared_Y # local data computation in tsne. Basically here local indicates Combined data(remote data is placed on the top of local site data). Computation specifications are described in 'tsneFunctions' Y_plot = tsne( combined_X, combined_Y, sharedRows, no_dims=no_dims, initial_dims=initial_dims, perplexity=perplexity, computation_phase=computation_phase) # save local site data into file with open("local_site1.txt", "w") as f1: for i in range(sharedRows, len(Y_plot)): f1.write(str(Y_plot[i][0]) + '\t') f1.write(str(Y_plot[i][1]) + '\n') # pass data to remote in json format localJson = ''' {"local": "local_site1.txt"} ''' localY = parser.parse_args(['--run', localJson]) return (localY.run)