def matmul_opt(app, W, D, num_gpus): # Section 1 cluster_state = ClusterState((num_gpus, 1), app.system) W_ga: GraphArray = GraphArray.from_ba(W, cluster_state) D_ga: GraphArray = GraphArray.from_ba(D, cluster_state) initend = time.time() # Section 2 Z_ga: GraphArray = opt.collapse_graph_array(app, W_ga @ D_ga) endtime = time.time() Z: BlockArray = opt.compute_graph_array(app, Z_ga) Z.touch() del Z return initend, endtime
def forward(app, X, W): Z = opt.collapse_graph_array(app, X @ W) return Z
def one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, num_gpus, verbose=False): # --forward propagation-- LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) y_ga = GraphArray.from_ba(y, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) if verbose: print("forward Z_1_ga") Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga) if verbose: print("forward S_1_ga") S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) if verbose: print("forward F_1_ga") F_1_ga: GraphArray = opt.sigmoid_deriv(app, Z_1_ga, one_ga) if verbose: print("forward Z_2_ga") Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) F_2_ga: GraphArray = opt.sigmoid_deriv(app, Z_2_ga, one_ga) if verbose: print("forward Z_out_ga") Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga) if verbose: print("forward y_predict_ga") y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) if verbose: print("forward F_out_ga") F_out_ga: GraphArray = opt.sigmoid_deriv(app, Z_out_ga, one_ga) initend = time.time() # --back propagation-- if verbose: print("collapse D_out_ga") D_out_ga = opt.collapse_graph_array(app, F_out_ga.T * (y_predict_ga - y_ga).T) if verbose: print("collapse D_2_ga") D_2_ga = opt.collapse_graph_array(app, F_2_ga.T * (W_2_out_ga @ D_out_ga)) if verbose: print("collapse D_1_ga") D_1_ga = opt.collapse_graph_array(app, F_1_ga.T * (W_1_2_ga @ D_2_ga)) distribute_graph_array(D_1_ga, cluster_state) if verbose: print("collapse_graph_array dW_in_1_ga") dW_in_1_ga = opt.collapse_graph_array(app, (D_1_ga @ X_ga).T) if verbose: print("collapse_graph_array dW_1_2_ga") dW_1_2_ga = opt.collapse_graph_array(app, (D_2_ga @ S_1_ga).T) if verbose: print("collapse_graph_array dW_2_out_ga") dW_2_out_ga = opt.collapse_graph_array(app, (D_out_ga @ S_2_ga).T) endtime = time.time() dW_in_1_ga_ba: BlockArray = opt.compute_graph_array(app, dW_in_1_ga) dW_1_2_ga_ba: BlockArray = opt.compute_graph_array(app, dW_1_2_ga) dW_2_out_ga_ba: BlockArray = opt.compute_graph_array(app, dW_2_out_ga) if verbose: print("update W_in_1") W_in_1 = W_in_1 - LR * dW_in_1_ga_ba if verbose: print("update W_1_2") W_1_2 = W_1_2 - LR * dW_1_2_ga_ba if verbose: print("update W_2_out") W_2_out = W_2_out - LR * dW_2_out_ga_ba W_in_1.touch() W_1_2.touch() W_2_out.touch() return initend, endtime
def one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, B_1, B_2, B_out, num_gpus): # --forward proprogation-- # print("start forward proprogation") LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) # print(f"X_ga block_shape {X_ga.block_shape}") y_ga = GraphArray.from_ba(y, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) B_1_ga = GraphArray.from_ba(B_1, cluster_state) B_2_ga = GraphArray.from_ba(B_2, cluster_state) B_out_ga = GraphArray.from_ba(B_out, cluster_state) Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga, B_1_ga) S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) F_1_ga: GraphArray = opt.sigmoid_deriv(app, Z_1_ga, one_ga) # print(f"S_1.shape {S_1.shape} S_1.block_shape {S_1.block_shape}") # Z_1_ga: GraphArray = opt.relu(S_1_ga, zero_ga) # print(f"Z_1.shape {Z_1.shape} Z_1.block_shape {Z_1.block_shape}") # F_1_ga: GraphArray = opt.relu_deriv(S_1_ga, zero_ga, one_ga) Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga, B_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) F_2_ga: GraphArray = opt.sigmoid_deriv(app, Z_2_ga, one_ga) # Z_2_ga: GraphArray = opt.relu(S_2_ga, zero_ga) # print(f"S_2.shape {S_2.shape} S_2.block_shape {S_2.block_shape}") # F_2_ga: GraphArray = opt.relu_deriv(S_2_ga, zero_ga, one_ga) Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga, B_out_ga) y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) # y_predict_ga: GraphArray = opt.relu(S_out_ga, zero_ga) # print("start back propogation") # --back propogation-- D_out_ga = opt.collapse_graph_array(app, (y_predict_ga - y_ga).T) D_2_ga = opt.collapse_graph_array(app, F_2_ga.T * (W_2_out_ga @ D_out_ga)) D_1_ga = opt.collapse_graph_array(app, F_1_ga.T * (W_1_2_ga @ D_2_ga)) D_out_ga_ba = opt.compute_graph_array(app, D_out_ga) D_2_ga_ba = opt.compute_graph_array(app, D_2_ga) D_1_ga_ba = opt.compute_graph_array(app, D_1_ga) S_1_ga_ba = opt.compute_graph_array(app, S_1_ga) S_2_ga_ba = opt.compute_graph_array(app, S_2_ga) W_in_1: BlockArray = update_weight(app, LR, W_in_1, D_1_ga_ba, X) W_1_2: BlockArray = update_weight(app, LR, W_1_2, D_2_ga_ba, S_1_ga_ba) W_2_out: BlockArray = update_weight(app, LR, W_2_out, D_out_ga_ba, S_2_ga_ba) B_1: BlockArray = update_bias(app, LR, B_1, D_1_ga_ba) B_2: BlockArray = update_bias(app, LR, B_2, D_2_ga_ba) B_out: BlockArray = update_bias(app, LR, B_out, D_out_ga_ba) # print("Start touching") W_in_1.touch() W_1_2.touch() W_2_out.touch() B_1.touch() B_2.touch() B_out.touch() return W_in_1, W_1_2, W_2_out, B_1, B_2, B_out
def one_step_fit_opt(app, X, y, W_in_1, W_1_2, W_2_out, num_gpus, verbose=False): # --forward proprogation-- # print("start forward proprogation") LR = app.one cluster_state = ClusterState((num_gpus, 1), app.system) one_ga: GraphArray = GraphArray.from_ba(app.one, cluster_state) X_ga = GraphArray.from_ba(X, cluster_state) # print(f"X_ga block_shape {X_ga.block_shape}") y_ga = GraphArray.from_ba(y, cluster_state) W_in_1_ga = GraphArray.from_ba(W_in_1, cluster_state) W_1_2_ga = GraphArray.from_ba(W_1_2, cluster_state) W_2_out_ga = GraphArray.from_ba(W_2_out, cluster_state) # Distribute Weights distribute_weights(app.one, cluster_state) # distribute_weights(X, cluster_state) # distribute_weights(y, cluster_state) if verbose: print("forward Z_1_ga") Z_1_ga: GraphArray = forward(app, X_ga, W_in_1_ga) # --> 0/1 if verbose: print("forward S_1_ga") S_1_ga: GraphArray = opt.sigmoid(app, Z_1_ga, one_ga) # --> 0/1 # distribute_weights(S_1_ga, cluster_state) if verbose: print("forward F_1_ga") F_1_ga: GraphArray = opt.sigmoid_deriv(app, Z_1_ga, one_ga) # --> 0/1 # print(f"S_1.shape {S_1.shape} S_1.block_shape {S_1.block_shape}") # Z_1_ga: GraphArray = opt.relu(S_1_ga, zero_ga) # print(f"Z_1.shape {Z_1.shape} Z_1.block_shape {Z_1.block_shape}") # F_1_ga: GraphArray = opt.relu_deriv(S_1_ga, zero_ga, one_ga) if verbose: print("forward Z_2_ga") Z_2_ga: GraphArray = forward(app, S_1_ga, W_1_2_ga) S_2_ga: GraphArray = opt.sigmoid(app, Z_2_ga, one_ga) F_2_ga: GraphArray = opt.sigmoid_deriv(app, Z_2_ga, one_ga) # Z_2_ga: GraphArray = opt.relu(S_2_ga, zero_ga) # print(f"S_2.shape {S_2.shape} S_2.block_shape {S_2.block_shape}") # F_2_ga: GraphArray = opt.relu_deriv(S_2_ga, zero_ga, one_ga) if verbose: print("forward Z_out_ga") Z_out_ga: GraphArray = forward(app, S_2_ga, W_2_out_ga) # --> 0/1 if verbose: print("forward y_predict_ga") y_predict_ga: GraphArray = opt.sigmoid(app, Z_out_ga, one_ga) # --> 0/1 if verbose: print("forward F_out_ga") F_out_ga: GraphArray = opt.sigmoid_deriv(app, Z_out_ga, one_ga) # --> 0/1 # print(F_out_ga.shape) -> (1000,) # y_predict_ga: GraphArray = opt.relu(S_out_ga, zero_ga) initend = time.time() if verbose: print( "-----------------------------start back propogation-------------------------------" ) print( "-----------------------------start back propogation-------------------------------" ) print( "-----------------------------start back propogation-------------------------------" ) # --back propogation-- if verbose: print("collapse D_out_ga") D_out_ga = opt.collapse_graph_array(app, F_out_ga.T * (y_predict_ga - y_ga).T) # --> 0/1 # D_out_ga = opt.collapse_graph_array(app, (y_predict_ga - y_ga) * F_out_ga) if verbose: print("collapse D_2_ga") # print(f"W_2_out_ga shape {W_2_out_ga.shape}") -> (2048,) # print(f"D_out_ga shape {D_out_ga.shape}") -> (1000,) # F_2_ga.shape -> (1000, 2048) D_2_ga = opt.collapse_graph_array(app, F_2_ga.T * (W_2_out_ga @ D_out_ga)) # D_2_ga = opt.collapse_graph_array(app, (D_out_ga @ W_2_out_ga.T) * F_2_ga) if verbose: print("collapse D_1_ga") D_1_ga = opt.collapse_graph_array(app, F_1_ga.T * (W_1_2_ga @ D_2_ga)) # --> 0/1 distribute_graph_array(D_1_ga, cluster_state) # print(D_1_ga.shape) # D_1_ga = opt.collapse_graph_array(app, (D_2_ga @ W_1_2_ga.T) * F_1_ga) # print("-----------------------------start computing weights-------------------------------") # print("-----------------------------start computing weights-------------------------------") # print("-----------------------------start computing weights-------------------------------") if verbose: print("collapse_graph_array dW_in_1_ga") dW_in_1_ga = opt.collapse_graph_array( app, (D_1_ga @ X_ga).T) # --> now all exeucted on GPU 0 if verbose: print("collapse_graph_array dW_1_2_ga") dW_1_2_ga = opt.collapse_graph_array(app, (D_2_ga @ S_1_ga).T) if verbose: print("collapse_graph_array dW_2_out_ga") dW_2_out_ga = opt.collapse_graph_array(app, (D_out_ga @ S_2_ga).T) endtime = time.time() dW_in_1_ga_ba: BlockArray = opt.compute_graph_array(app, dW_in_1_ga) dW_1_2_ga_ba: BlockArray = opt.compute_graph_array(app, dW_1_2_ga) dW_2_out_ga_ba: BlockArray = opt.compute_graph_array(app, dW_2_out_ga) # W_in_1_ga = opt.collapse_graph_array(app, W_in_1_ga - one_ga * (D_1_ga @ X_ga).T) # print("collapse_graph_array W_1_2_ga") # W_1_2_ga = opt.collapse_graph_array(app, W_1_2_ga - one_ga * (D_2_ga @ S_1_ga).T) # print("collapse_graph_array W_2_out_ga") # W_2_out_ga = opt.collapse_graph_array(app, W_2_out_ga - one_ga * (D_out_ga @ S_2_ga).T) # W_in_1: BlockArray = opt.compute_graph_array(app, W_in_1_ga) # W_1_2: BlockArray = opt.compute_graph_array(app, W_1_2_ga) # W_2_out: BlockArray = opt.compute_graph_array(app, W_2_out_ga) if verbose: print("update W_in_1") W_in_1 -= dW_in_1_ga_ba if verbose: print("update W_1_2") W_1_2 -= dW_1_2_ga_ba if verbose: print("update W_2_out") W_2_out -= dW_2_out_ga_ba # D_out_ga_ba = opt.compute_graph_array(app, D_out_ga) # D_2_ga_ba = opt.compute_graph_array(app, D_2_ga) # D_1_ga_ba = opt.compute_graph_array(app, D_1_ga) # S_1_ga_ba = opt.compute_graph_array(app, S_1_ga) # S_2_ga_ba = opt.compute_graph_array(app, S_2_ga) # W_in_1: BlockArray = update_weight(app, LR, W_in_1, D_1_ga_ba, X) # W_1_2: BlockArray = update_weight(app, LR, W_1_2, D_2_ga_ba, S_1_ga_ba) # W_2_out: BlockArray = update_weight(app, LR, W_2_out, D_out_ga_ba, S_2_ga_ba) # W - LR * (D @ X).T # print("Start touching") W_in_1.touch() W_1_2.touch() W_2_out.touch() return initend, endtime