def feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out, num_gpus): # Section 1 # LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) B_1_ga = GraphArray.from_ba(B_1, cluster_state) B_2_ga = GraphArray.from_ba(B_2, cluster_state) B_out_ga = GraphArray.from_ba(B_out, cluster_state) initend = time.time() # Section 2 Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga, B_1_ga) S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga, B_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga, B_out_ga) y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) endtime = time.time() y_predict_ga_ba: BlockArray = opt.compute_graph_array(app, y_predict_ga) y_predict_ga_ba.touch() return initend, endtime
def feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, num_gpus): # Section 1 # LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) # Distribute Weights distribute_weights(W_in_1, cluster_state) distribute_weights(W_1_2, cluster_state) distribute_weights(W_2_out, cluster_state) # for node_id in cluster_state.get_cluster_node_ids(): # for grid_entry in W_in_1.grid.get_entry_iterator(): # from nums.core.array.base import Block # block: Block = W_in_1.blocks[grid_entry] # if node_id not in cluster_state.get_block_node_ids(block.id): # dst_actor = node_id[0] # app.system.distribute_to(block.oid, dst_actor) # copy for compute # cluster_state.commit_copy_block(block.id, node_id) # copy for optimizer one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) initend = time.time() # Section 2 Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga) S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga) y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) endtime = time.time() y_predict_ga_ba: BlockArray = opt.compute_graph_array(app, y_predict_ga) y_predict_ga_ba.touch() return y_predict_ga_ba
def feedforward_opt(app, X, W_in_1, W_1_2, W_2_out, num_gpus): # Section 1 # LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) # print(f"X_ga block_shape {X_ga.block_shape}") # y_ga = GraphArray.from_ba(y, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) # Distribute Weights distribute_weights(app.one, cluster_state) distribute_weights(X, cluster_state) # distribute_weights(y, cluster_state) initend = time.time() # Section 2 # print(f"forward Z_1_ga") # print(f"W_in_1_ga block_shape {W_in_1_ga.block_shape}") Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga) # --> 0/1 S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) # --> 0/1 # distribute_weights(S_1_ga, cluster_state) # print(f"forward Z_2_ga") Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) # print("forward Z_out_ga") Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga) # --> 0/1 # print("forward y_predict_ga") y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) # --> 0/1 endtime = time.time() y_predict_ga_ba: BlockArray = opt.compute_graph_array(app, y_predict_ga) y_predict_ga_ba.touch() return initend, endtime
def one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, num_gpus, verbose=False): # --forward propagation-- LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) y_ga = GraphArray.from_ba(y, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) if verbose: print("forward Z_1_ga") Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga) if verbose: print("forward S_1_ga") S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) if verbose: print("forward F_1_ga") F_1_ga: GraphArray = opt.sigmoid_deriv(app, Z_1_ga, one_ga) if verbose: print("forward Z_2_ga") Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) F_2_ga: GraphArray = opt.sigmoid_deriv(app, Z_2_ga, one_ga) if verbose: print("forward Z_out_ga") Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga) if verbose: print("forward y_predict_ga") y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) if verbose: print("forward F_out_ga") F_out_ga: GraphArray = opt.sigmoid_deriv(app, Z_out_ga, one_ga) initend = time.time() # --back propagation-- if verbose: print("collapse D_out_ga") D_out_ga = opt.collapse_graph_array(app, F_out_ga.T * (y_predict_ga - y_ga).T) if verbose: print("collapse D_2_ga") D_2_ga = opt.collapse_graph_array(app, F_2_ga.T * (W_2_out_ga @ D_out_ga)) if verbose: print("collapse D_1_ga") D_1_ga = opt.collapse_graph_array(app, F_1_ga.T * (W_1_2_ga @ D_2_ga)) distribute_graph_array(D_1_ga, cluster_state) if verbose: print("collapse_graph_array dW_in_1_ga") dW_in_1_ga = opt.collapse_graph_array(app, (D_1_ga @ X_ga).T) if verbose: print("collapse_graph_array dW_1_2_ga") dW_1_2_ga = opt.collapse_graph_array(app, (D_2_ga @ S_1_ga).T) if verbose: print("collapse_graph_array dW_2_out_ga") dW_2_out_ga = opt.collapse_graph_array(app, (D_out_ga @ S_2_ga).T) endtime = time.time() dW_in_1_ga_ba: BlockArray = opt.compute_graph_array(app, dW_in_1_ga) dW_1_2_ga_ba: BlockArray = opt.compute_graph_array(app, dW_1_2_ga) dW_2_out_ga_ba: BlockArray = opt.compute_graph_array(app, dW_2_out_ga) if verbose: print("update W_in_1") W_in_1 = W_in_1 - LR * dW_in_1_ga_ba if verbose: print("update W_1_2") W_1_2 = W_1_2 - LR * dW_1_2_ga_ba if verbose: print("update W_2_out") W_2_out = W_2_out - LR * dW_2_out_ga_ba W_in_1.touch() W_1_2.touch() W_2_out.touch() return initend, endtime
def one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out, num_gpus): # --forward proprogation-- # print("start forward proprogation") LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) # print(f"X_ga block_shape {X_ga.block_shape}") y_ga = GraphArray.from_ba(y, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) B_1_ga = GraphArray.from_ba(B_1, cluster_state) B_2_ga = GraphArray.from_ba(B_2, cluster_state) B_out_ga = GraphArray.from_ba(B_out, cluster_state) Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga, B_1_ga) S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) F_1_ga: GraphArray = opt.sigmoid_deriv(app, Z_1_ga, one_ga) # print(f"S_1.shape {S_1.shape} S_1.block_shape {S_1.block_shape}") # Z_1_ga: GraphArray = opt.relu(S_1_ga, zero_ga) # print(f"Z_1.shape {Z_1.shape} Z_1.block_shape {Z_1.block_shape}") # F_1_ga: GraphArray = opt.relu_deriv(S_1_ga, zero_ga, one_ga) Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga, B_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) F_2_ga: GraphArray = opt.sigmoid_deriv(app, Z_2_ga, one_ga) # Z_2_ga: GraphArray = opt.relu(S_2_ga, zero_ga) # print(f"S_2.shape {S_2.shape} S_2.block_shape {S_2.block_shape}") # F_2_ga: GraphArray = opt.relu_deriv(S_2_ga, zero_ga, one_ga) Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga, B_out_ga) y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) # y_predict_ga: GraphArray = opt.relu(S_out_ga, zero_ga) # print("start back propogation") # --back propogation-- D_out_ga = opt.collapse_graph_array(app, (y_predict_ga - y_ga).T) D_2_ga = opt.collapse_graph_array(app, F_2_ga.T * (W_2_out_ga @ D_out_ga)) D_1_ga = opt.collapse_graph_array(app, F_1_ga.T * (W_1_2_ga @ D_2_ga)) D_out_ga_ba = opt.compute_graph_array(app, D_out_ga) D_2_ga_ba = opt.compute_graph_array(app, D_2_ga) D_1_ga_ba = opt.compute_graph_array(app, D_1_ga) S_1_ga_ba = opt.compute_graph_array(app, S_1_ga) S_2_ga_ba = opt.compute_graph_array(app, S_2_ga) W_in_1: BlockArray = update_weight(app, LR, W_in_1, D_1_ga_ba, X) W_1_2: BlockArray = update_weight(app, LR, W_1_2, D_2_ga_ba, S_1_ga_ba) W_2_out: BlockArray = update_weight(app, LR, W_2_out, D_out_ga_ba, S_2_ga_ba) B_1: BlockArray = update_bias(app, LR, B_1, D_1_ga_ba) B_2: BlockArray = update_bias(app, LR, B_2, D_2_ga_ba) B_out: BlockArray = update_bias(app, LR, B_out, D_out_ga_ba) # print("Start touching") W_in_1.touch() W_1_2.touch() W_2_out.touch() B_1.touch() B_2.touch() B_out.touch() return W_in_1, W_1_2, W_2_out, B_1, B_2, B_out
def one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, num_gpus, verbose=False): # --forward proprogation-- # print("start forward proprogation") LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) # print(f"X_ga block_shape {X_ga.block_shape}") y_ga = GraphArray.from_ba(y, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) # Distribute Weights distribute_weights(app.one, cluster_state) # distribute_weights(X, cluster_state) # distribute_weights(y, cluster_state) if verbose: print("forward Z_1_ga") Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga) # --> 0/1 if verbose: print("forward S_1_ga") S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) # --> 0/1 # distribute_weights(S_1_ga, cluster_state) if verbose: print("forward F_1_ga") F_1_ga: GraphArray = opt.sigmoid_deriv(app, Z_1_ga, one_ga) # --> 0/1 # print(f"S_1.shape {S_1.shape} S_1.block_shape {S_1.block_shape}") # Z_1_ga: GraphArray = opt.relu(S_1_ga, zero_ga) # print(f"Z_1.shape {Z_1.shape} Z_1.block_shape {Z_1.block_shape}") # F_1_ga: GraphArray = opt.relu_deriv(S_1_ga, zero_ga, one_ga) if verbose: print("forward Z_2_ga") Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) F_2_ga: GraphArray = opt.sigmoid_deriv(app, Z_2_ga, one_ga) # Z_2_ga: GraphArray = opt.relu(S_2_ga, zero_ga) # print(f"S_2.shape {S_2.shape} S_2.block_shape {S_2.block_shape}") # F_2_ga: GraphArray = opt.relu_deriv(S_2_ga, zero_ga, one_ga) if verbose: print("forward Z_out_ga") Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga) # --> 0/1 if verbose: print("forward y_predict_ga") y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) # --> 0/1 if verbose: print("forward F_out_ga") F_out_ga: GraphArray = opt.sigmoid_deriv(app, Z_out_ga, one_ga) # --> 0/1 # print(F_out_ga.shape) -> (1000,) # y_predict_ga: GraphArray = opt.relu(S_out_ga, zero_ga) initend = time.time() if verbose: print( "-----------------------------start back propogation-------------------------------" ) print( "-----------------------------start back propogation-------------------------------" ) print( "-----------------------------start back propogation-------------------------------" ) # --back propogation-- if verbose: print("collapse D_out_ga") D_out_ga = opt.collapse_graph_array(app, F_out_ga.T * (y_predict_ga - y_ga).T) # --> 0/1 # D_out_ga = opt.collapse_graph_array(app, (y_predict_ga - y_ga) * F_out_ga) if verbose: print("collapse D_2_ga") # print(f"W_2_out_ga shape {W_2_out_ga.shape}") -> (2048,) # print(f"D_out_ga shape {D_out_ga.shape}") -> (1000,) # F_2_ga.shape -> (1000, 2048) D_2_ga = opt.collapse_graph_array(app, F_2_ga.T * (W_2_out_ga @ D_out_ga)) # D_2_ga = opt.collapse_graph_array(app, (D_out_ga @ W_2_out_ga.T) * F_2_ga) if verbose: print("collapse D_1_ga") D_1_ga = opt.collapse_graph_array(app, F_1_ga.T * (W_1_2_ga @ D_2_ga)) # --> 0/1 distribute_graph_array(D_1_ga, cluster_state) # print(D_1_ga.shape) # D_1_ga = opt.collapse_graph_array(app, (D_2_ga @ W_1_2_ga.T) * F_1_ga) # print("-----------------------------start computing weights-------------------------------") # print("-----------------------------start computing weights-------------------------------") # print("-----------------------------start computing weights-------------------------------") if verbose: print("collapse_graph_array dW_in_1_ga") dW_in_1_ga = opt.collapse_graph_array( app, (D_1_ga @ X_ga).T) # --> now all exeucted on GPU 0 if verbose: print("collapse_graph_array dW_1_2_ga") dW_1_2_ga = opt.collapse_graph_array(app, (D_2_ga @ S_1_ga).T) if verbose: print("collapse_graph_array dW_2_out_ga") dW_2_out_ga = opt.collapse_graph_array(app, (D_out_ga @ S_2_ga).T) endtime = time.time() dW_in_1_ga_ba: BlockArray = opt.compute_graph_array(app, dW_in_1_ga) dW_1_2_ga_ba: BlockArray = opt.compute_graph_array(app, dW_1_2_ga) dW_2_out_ga_ba: BlockArray = opt.compute_graph_array(app, dW_2_out_ga) # W_in_1_ga = opt.collapse_graph_array(app, W_in_1_ga - one_ga * (D_1_ga @ X_ga).T) # print("collapse_graph_array W_1_2_ga") # W_1_2_ga = opt.collapse_graph_array(app, W_1_2_ga - one_ga * (D_2_ga @ S_1_ga).T) # print("collapse_graph_array W_2_out_ga") # W_2_out_ga = opt.collapse_graph_array(app, W_2_out_ga - one_ga * (D_out_ga @ S_2_ga).T) # W_in_1: BlockArray = opt.compute_graph_array(app, W_in_1_ga) # W_1_2: BlockArray = opt.compute_graph_array(app, W_1_2_ga) # W_2_out: BlockArray = opt.compute_graph_array(app, W_2_out_ga) if verbose: print("update W_in_1") W_in_1 -= dW_in_1_ga_ba if verbose: print("update W_1_2") W_1_2 -= dW_1_2_ga_ba if verbose: print("update W_2_out") W_2_out -= dW_2_out_ga_ba # D_out_ga_ba = opt.compute_graph_array(app, D_out_ga) # D_2_ga_ba = opt.compute_graph_array(app, D_2_ga) # D_1_ga_ba = opt.compute_graph_array(app, D_1_ga) # S_1_ga_ba = opt.compute_graph_array(app, S_1_ga) # S_2_ga_ba = opt.compute_graph_array(app, S_2_ga) # W_in_1: BlockArray = update_weight(app, LR, W_in_1, D_1_ga_ba, X) # W_1_2: BlockArray = update_weight(app, LR, W_1_2, D_2_ga_ba, S_1_ga_ba) # W_2_out: BlockArray = update_weight(app, LR, W_2_out, D_out_ga_ba, S_2_ga_ba) # W - LR * (D @ X).T # print("Start touching") W_in_1.touch() W_1_2.touch() W_2_out.touch() return initend, endtime