def main(): global transmission_time, sent_goodput, received_goodput, temp_trans_fog cfgs = [192, 160, 96, 'M', 192, 192, 192,'A', 192, 192, 12, 'A'] kernel_filters = [5,1,1,3,5,1,1,3,3,1,1,8] stride = [1,1,1,2,1,1,1,2,1,1,1,1] padding = [2,0,0,1,2,0,0,1,1,0,0,0] model = torch.load("/home/arnab/Desktop/Data/nin.pt", map_location=torch.device('cpu')) models = {1:"AlexNet",2:"ResNet34",3:"VGG16",4:"NiN"} m = input("Enter Model Number [1: AlexNet 2: ResNet34 3: VGG16 4: NiN]: ") pretrained_model = models[int(m)] MAX_MESSAGE_LENGTH = 104857600 # 100MB # load model keylist model_list = create_model_list(model) model_dict = create_model_dict(cfgs,model_list,kernel_filters,stride,padding) try: # write file in csv format csv_file = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_nin_cpu_match_part.csv','a') fieldnames = ['layer', 'transmission_time', 'transmission_data', 'computation_time'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() # initialize variable current_layer = None img_part_list = [] classify_list = [] prev_input_units = 0 total_transmission_time = 0 final_out = None # swap matching algorithm matching = match.Matching() matched_fog_node_CA_idx = matching.F_CA.index(max(matching.F_CA)) print(f"matched_fog_node_CA_idx: {matched_fog_node_CA_idx}") matching.DNN_inference_offloading_swap_matching() matched_fog_node = matching.rand_match[0][1] # one user matched with one best fog node. matched_fog_node = matched_fog_node + ":50051" # create a channel and a stub (client) channel = grpc.insecure_channel(matched_fog_node, options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),],) stub = message_pb2_grpc.CalculatorStub(channel) # load dataset userObject = user.User(pretrained_model=pretrained_model,CA=matching.F_CA) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() reset(worker) # reset transmission_latency, computation_latency, transmission_data_size epoch = 0 for img,level in loader: print(f"Epoch: {epoch}") for i in range(len(cfgs)): current_layer = i if i == 0: input_ = img channel = input_.shape[1] else: input_ = final_out channel = input_.shape[1] after_part = userObject.partition_algorithm("conv", input_, f=kernel_filters[current_layer]) for j in range(len(after_part)): img_part = adaptive_partitioning(input_,after_part[j]) # partitioned image sent_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput if j == matched_fog_node_CA_idx: msg = send_message("conv", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) r = stub.Node(out) img_part = received_message(r.message,current_layer,j) img_part_list.append(img_part) else: s = time.time() img_part = nin.nn_layer('conv', img_part, current_layer, model, 0, model_dict) e = time.time() layer_comp_time = (e-s)*1000 img_part_list.append(img_part) received_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput total_data = (sent_goodput[j][current_layer]+received_goodput[j][current_layer])/(1024*1024) set_value(current_layer, 0, sent_goodput[j][current_layer]/(1024*1024), layer_comp_time) if len(img_part_list) == 1: final_out = img_part else: final_out = torch.cat((final_out,img_part),2) if len(img_part_list) == worker: img_part_list = [] print("\tAfter Marge: " + str(final_out.size())) total_transmission_time += transmission_time/worker print(f"total_transmission_time: {total_transmission_time}") transmission_time = 0 # Adaptive average Pool avgpool = nn.AdaptiveAvgPool2d((1, 1)) final_out = avgpool(final_out) final_out = torch.flatten(final_out,1) m = nn.ReLU() final_out = m(final_out).data > 0 final_out = final_out.int() print(f"Final Result: {final_out}") print(f"Transmission time (one node): {total_transmission_time}") transmission_time = 0 total_transmission_time = 0 epoch += 1 if epoch == 1: write_to_file(epoch, writer, worker) break except Exception as e: print("main:ERROR") print(e)
def main(): global transmission_time, sent_goodput, received_goodput, temp_trans_fog cfgs = [192, 160, 96, 'M', 192, 192, 192, 'A', 192, 192, 12, 'A'] kernel_filters = [5, 1, 1, 3, 5, 1, 1, 3, 3, 1, 1, 8] models = {1: "AlexNet", 2: "ResNet34", 3: "VGG16", 4: "NiN"} m = input("Enter Model Number [1: AlexNet 2: ResNet34 3: VGG16 4: NiN]: ") pretrained_model = models[int(m)] CA = [4.0, 6.0] # capability temporary MAX_MESSAGE_LENGTH = 104857600 # 100MB try: # write file in csv format csv_file = open( '/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_nin_cpu_random_part.csv', 'a') fieldnames = [ 'layer', 'transmission_time', 'transmission_data', 'computation_time' ] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() # initialize variable current_layer = None img_part_list = [] classify_list = [] prev_input_units = 0 total_transmission_time = 0 final_out = None # create a channel and a stub (client) channel1 = grpc.insecure_channel( '192.168.0.106:50051', options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH), ], ) channel2 = grpc.insecure_channel( '192.168.0.107:50051', options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH), ], ) # create a stub (client) stub1 = message_pb2_grpc.CalculatorStub(channel1) stub2 = message_pb2_grpc.CalculatorStub(channel2) userObject = user.User(pretrained_model=pretrained_model, CA=CA) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() # load dataset CA = [float(x) for x in np.random.randint(1, 10, size=2) ] # CA temporary size=worker print(f"CA: {CA}") userObject = user.User(pretrained_model=pretrained_model, CA=CA) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() reset( worker ) # reset transmission_latency, computation_latency, transmission_data_size epoch = 0 for img, level in loader: print(f"Epoch: {epoch}") for i in range(len(cfgs)): current_layer = i if i == 0: input_ = img channel = input_.shape[1] else: input_ = final_out channel = input_.shape[1] after_part = userObject.partition_algorithm( "conv", input_, f=kernel_filters[current_layer]) for j in range(len(after_part)): print(f"Conv: partition: {after_part[j]}") img_part = adaptive_partitioning( input_, after_part[j]) # partitioned image sent_goodput[j][current_layer] = img_part.element_size( ) * img_part.nelement() # goodput msg = send_message("conv", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) if j == 0: r = stub1.Node(out) img_part = received_message(r.message, current_layer, j) img_part_list.append(img_part) elif j == 1: r = stub2.Node(out) img_part = received_message(r.message, current_layer, j) img_part_list.append(img_part) if len(img_part_list) == 1: final_out = img_part else: final_out = torch.cat((final_out, img_part), 2) if len(img_part_list) == worker: img_part_list = [] print("\tAfter Marge: " + str(final_out.size())) total_transmission_time += transmission_time / worker print( f"total_transmission_time: {total_transmission_time}") transmission_time = 0 # Adaptive average Pool avgpool = nn.AdaptiveAvgPool2d((1, 1)) final_out = avgpool(final_out) final_out = torch.flatten(final_out, 1) m = nn.ReLU() final_out = m(final_out).data > 0 final_out = final_out.int() print(f"Final Result: {final_out}") print(f"Transmission time (one node): {total_transmission_time}") transmission_time = 0 total_transmission_time = 0 epoch += 1 if epoch == 1: write_to_file(epoch, writer, worker) break except Exception as e: print("main:ERROR") print(e)
def main(): global transmission_time, sent_goodput, received_goodput models = {1:"AlexNet",2:"ResNet34",3:"VGG16",4:"NiN"} m = input("Enter Model Number [1: AlexNet 2: ResNet34 3: VGG16 4: NiN]: ") pretrained_model = models[int(m)] MAX_MESSAGE_LENGTH = 104857600 # 100MB try: # write file in csv format csv_file = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_vgg16_cpu_greedy_part.csv','a') fieldnames = ['layer', 'transmission_time', 'transmission_data', 'computation_time'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() # initialize variable current_layer = None img_part_list = [] classify_list = [] prev_input_units = 0 total_transmission_time = 0 final_out = None num_layers = 19 # 13 conv layers, 5 maxpool layers and 1 sequential fc layers # swap matching algorithm matching = match.Matching() greedy_random_match = matching.greedy_random_match() print(f"Greedy Random Match: {greedy_random_match}") matched_fog_node = greedy_random_match[0][1] # one user matched with one best fog node. matched_fog_node = matched_fog_node + ":50051" # create a channel and a stub (client) channel = grpc.insecure_channel(matched_fog_node, options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),],) stub = message_pb2_grpc.CalculatorStub(channel) # load dataset userObject = user.User(pretrained_model=pretrained_model, CA=matching.F_CA) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() reset() # reset transmission_latency, computation_latency, transmission_data_size epoch = 0 for img,level in loader: print(f"Epoch: {epoch}") # One user matches with one fog node (NO PARTITION) for layer in range(num_layers): current_layer = layer if current_layer == 0: input_ = img channel = input_.shape[1] else: input_ = final_out channel = input_.shape[1] if current_layer < 18: sent_goodput[current_layer] = input_.element_size() * input_.nelement() # goodput msg = send_message("conv", input_ , 0 , BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) r = stub.Node(out) img_part = received_message(r.message, current_layer) final_out = img_part total_transmission_time += transmission_time transmission_time = 0 else: # Adaptive average Pool avgpool = nn.AdaptiveAvgPool2d((7, 7)) out = avgpool(final_out) input_ = out channel = input_.shape[1] sent_goodput[current_layer] = input_.element_size() * input_.nelement() # goodput msg = send_message("ff", input_, 0, BATCH_SIZE, current_layer, epoch, pretrained_model, prev_input_units) out = message_pb2.Request(message=msg) r = stub.Node(out) img_part = received_message(r.message, current_layer) final_out = img_part total_transmission_time += transmission_time print(f"Final_out: {final_out}") print(f"total_transmission_time: {total_transmission_time}") transmission_time = 0 total_transmission_time = 0 epoch += 1 if epoch == 1: write_to_file(epoch, writer, 1) break except Exception as e: print("main:ERROR") print(e)
def main(): global transmission_time, transmission_latency, computation_latency, transmission_data_size, temp_trans, temp_layer_comp, temp_total_data, sent_goodput, received_goodput model = torch.load("/home/arnab/Desktop/Data/alexnet.pt", map_location=torch.device('cpu')) models = {1:"AlexNet",2:"ResNet34",3:"VGG16",4:"NiN"} m = input("Enter Model Number [1: AlexNet 2: ResNet34 3: VGG16 4: NiN]: ") pretrained_model = models[int(m)] try: # write file in csv format csv_file = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_alexnet_cpu_match_part.csv','a') fieldnames = ['layer', 'transmission_time', 'transmission_data', 'computation_time'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() # initialize variables current_layer = None img_part_list = [] classify_list = [] prev_input_units = 0 total_transmission_time = 0 final_out = None num_layers = 6 # swap matching algorithm matching = match.Matching() matched_fog_node_CA_idx = matching.F_CA.index(max(matching.F_CA)) print(f"matched_fog_node_CA_idx: {matched_fog_node_CA_idx}") matching.DNN_inference_offloading_swap_matching() matched_fog_node = matching.rand_match[0][1] # one user matched with one best fog node. matched_fog_node = matched_fog_node + ":50051" # create a channel and a stub (client) channel1 = grpc.insecure_channel(matched_fog_node) stub1 = message_pb2_grpc.CalculatorStub(channel1) # load dataset userObject = user.User(pretrained_model=pretrained_model, CA=matching.F_CA) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() reset(worker) # reset transmission_latency, computation_latency, transmission_data_size epoch = 0 for img,level in loader: prev_input_units = 0 print(f"Epoch: {epoch}") # One user matches with one fog node (PARTITION: one part in user, another part in fog node) for i,k in enumerate(userObject.kernel_filters): current_layer = i if i == 0: input_ = img channel = input_.shape[1] else: input_ = final_out channel = input_.shape[1] after_part = userObject.partition_algorithm("conv", input_, f=k) for j in range(len(after_part)): img_part = adaptive_partitioning(input_, after_part[j]) # partitioned image sent_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput if j == matched_fog_node_CA_idx: msg = send_message("conv", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) r = stub1.Node(out) img_part = received_message(r.message, current_layer, j) # goodput img_part_list.append(img_part) else: s = time.time() img_part = alexnet.nn_layer("conv", img_part, current_layer, model, 0) e = time.time() layer_comp_time = (e-s)*1000 img_part_list.append(img_part) received_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput total_data = (sent_goodput[j][current_layer]+received_goodput[j][current_layer])/(1024*1024) set_value(current_layer, 0, total_data, layer_comp_time) if len(img_part_list) == 1: final_out = img_part else: final_out = torch.cat((final_out,img_part),2) if len(img_part_list) == worker: img_part_list = [] print("\tAfter Marge: " + str(final_out.size())) total_transmission_time += transmission_time print(f"Current Conv Layer: {current_layer} total_transmission_time: {total_transmission_time}") transmission_time = 0 # Adaptive average Pool avgpool = nn.AdaptiveAvgPool2d((6, 6)) out = avgpool(final_out) input_ = out channel = input_.shape[1] current_layer += 1 after_part = userObject.partition_algorithm("ff", input_, f=0) for j in range(len(after_part)): img_part = adaptive_partitioning(input_,after_part[j]) flat_img = torch.flatten(img_part,1) sent_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput if j == matched_fog_node_CA_idx: msg = send_message("ff", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, prev_input_units) prev_input_units = flat_img.shape[1] out = message_pb2.Request(message=msg) r = stub1.Node(out) img_part = received_message(r.message, current_layer, j) # goodput classify_list.append(img_part) else: s = time.time() img_part = alexnet.nn_layer("ff", img_part, current_layer, model, prev_input_units) e = time.time() layer_comp_time = (e-s)*1000 prev_input_units = flat_img.shape[1] m = nn.ReLU() out = m(img_part).data > 0 out = out.int() classify_list.append(out) received_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput total_data = (sent_goodput[j][current_layer]+received_goodput[j][current_layer])/(1024*1024) set_value(current_layer, 0, total_data, layer_comp_time) if len(classify_list) == worker: classify_final = None for i in range(len(classify_list)-1): if i == 0: classify_final = np.bitwise_or(classify_list[i].numpy()[:], classify_list[i+1].numpy()[:]) else: classify_final = np.bitwise_or(classify_final,classify_list[i+1].numpy()[:]) classify_list = [] prev_input_units = 0 # for fc layer total_transmission_time += transmission_time print(f"total_transmission_time: {total_transmission_time}") print(f"Transmission time (one node): {total_transmission_time}") transmission_time = 0 total_transmission_time = 0 epoch += 1 if epoch == 1: write_to_file(epoch, writer, worker) break except Exception as e: print("main:ERROR") print(e)
def main(): global transmission_time, sent_goodput, received_goodput, temp_trans_fog cfgs = [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'] models = {1:"AlexNet",2:"ResNet34",3:"VGG16",4:"NiN"} m = input("Enter Model Number [1: AlexNet 2: ResNet34 3: VGG16 4: NiN]: ") pretrained_model = models[int(m)] CA = [4.0, 6.0] # capability temporary MAX_MESSAGE_LENGTH = 104857600 # 100MB try: # write file in csv format csv_file = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_vgg16_cpu_random_part.csv','a') fieldnames = ['layer', 'transmission_time', 'transmission_data', 'computation_time'] writer = csv.DictWriter(csv_file, fieldnames=fieldnames) writer.writeheader() # initialize variable current_layer = None img_part_list = [] classify_list = [] prev_input_units = 0 total_transmission_time = 0 final_out = None # create a channel and a stub (client) channel1 = grpc.insecure_channel('192.168.0.106:50051', options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),],) channel2 = grpc.insecure_channel('192.168.0.107:50051', options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),],) # create a stub (client) stub1 = message_pb2_grpc.CalculatorStub(channel1) stub2 = message_pb2_grpc.CalculatorStub(channel2) # load dataset userObject = user.User(pretrained_model=pretrained_model, CA=CA) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() reset(worker) # reset transmission_latency, computation_latency, transmission_data_size epoch = 0 for img,level in loader: print(f"Epoch: {epoch}") for i in range(len(cfgs)): current_layer = i print(f"Current: {current_layer}") if i == 0: input_ = img channel = input_.shape[1] else: input_ = final_out channel = input_.shape[1] after_part = userObject.random_partition(input_,userObject.kernel_filters) for j in range(len(after_part)): img_part = adaptive_partitioning(input_,after_part[j]) # partitioned image sent_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput msg = send_message("conv", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) if j == 0: r = stub1.Node(out) img_part = received_message(r.message,current_layer,j) img_part_list.append(img_part) elif j == 1: r = stub2.Node(out) img_part = received_message(r.message,current_layer,j) img_part_list.append(img_part) if len(img_part_list) == 1: final_out = img_part else: final_out = torch.cat((final_out,img_part),2) if len(img_part_list) == worker: img_part_list = [] print("\tAfter Marge: " + str(final_out.size())) total_transmission_time += transmission_time/worker print(f"Current Conv Layer: {current_layer} total_transmission_time: {total_transmission_time}") transmission_time = 0 # Adaptive average Pool avgpool = nn.AdaptiveAvgPool2d((7, 7)) out = avgpool(final_out) input_ = out channel = input_.shape[1] current_layer += 1 after_part = userObject.random_partition(input_, 1) for j in range(len(after_part)): img_part = adaptive_partitioning(input_,after_part[j]) flat_img = torch.flatten(img_part,1) sent_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput msg = send_message("ff", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, prev_input_units) prev_input_units = flat_img.shape[1] out = message_pb2.Request(message=msg) if j == 0: r = stub1.Node(out) img_part = received_message(r.message,current_layer,j) classify_list.append(img_part) elif j == 1: r = stub2.Node(out) img_part = received_message(r.message,current_layer,j) classify_list.append(img_part) if len(classify_list) == worker: classify_final = None for i in range(len(classify_list)-1): if i == 0: classify_final = np.bitwise_or(classify_list[i].numpy()[:], classify_list[i+1].numpy()[:]) else: classify_final = np.bitwise_or(classify_final,classify_list[i+1].numpy()[:]) classify_list = [] prev_input_units = 0 # for fc layer total_transmission_time += transmission_time/worker print(f"total_transmission_time: {total_transmission_time}") print(f"Transmission time (one node): {total_transmission_time}") transmission_time = 0 total_transmission_time = 0 epoch += 1 if epoch == 1: write_to_file(epoch, writer, worker) break except Exception as e: print("main:ERROR") print(e)
def main(): global transmission_time, sent_goodput, received_goodput, temp_trans_fog cfgs = [(64,6), (64,12), (64,12), (64,12), (128,18,2), (128,12), (128,12), (128,12), (256,18,2), (256,12), (256,12), (256,12), (256,12), (256,12), (512,18,2), (512,12), (512,12)] models = {1:"AlexNet",2:"ResNet34",3:"VGG16"} m = input("Enter Model Number [1: AlexNet 2: ResNet34 3: VGG16]: ") pretrained_model = models[int(m)] CA = [4.0, 6.0] # capability temporary MAX_MESSAGE_LENGTH = 104857600 # 100MB # open a gRPC channel try: # write file in csv format #csv_file = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_vgg16_cpu.csv','a') csv_file1 = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_resnet_cpu_node_1.csv','a') csv_file2 = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_resnet_cpu_node_2.csv','a') fieldnames = ['layer', 'transmission_time', 'transmission_data', 'computation_time'] writer1 = csv.DictWriter(csv_file1, fieldnames=fieldnames) writer1.writeheader() writer2 = csv.DictWriter(csv_file2, fieldnames=fieldnames) writer2.writeheader() channel1 = grpc.insecure_channel('192.168.0.106:50051', options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),],) channel2 = grpc.insecure_channel('192.168.0.107:50051', options=[ ('grpc.max_send_message_length', MAX_MESSAGE_LENGTH), ('grpc.max_receive_message_length', MAX_MESSAGE_LENGTH),],) # create a stub (client) stub1 = message_pb2_grpc.CalculatorStub(channel1) stub2 = message_pb2_grpc.CalculatorStub(channel2) current_layer = None kernel_filters = None img_part_list = [] classify_list = [] prev_input_units = 0 total_transmission_time = 0 final_out = None userObject = user.User(pretrained_model=pretrained_model,CA=CA) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() epoch = 0 for img,level in loader: print(f"Epoch: {epoch}") logging.info(f"Epoch: {epoch}\n") for i in range(len(cfgs)): current_layer = i print(f"Current Conv Layer: {current_layer}\n") if i == 0: input_ = img channel = input_.shape[1] kernel_filters = 7 else: input_ = final_out channel = input_.shape[1] kernel_filters = 3 after_part = userObject.partition_algorithm("conv", input_, f=kernel_filters) for j in range(len(after_part)): img_part = adaptive_partitioning(input_,after_part[j]) # partitioned image sent_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput msg = send_message("conv", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) if j == 0: r = stub1.Node(out) img_part = received_message(r.message,current_layer,j,writer1) # goodput img_part_list.append(img_part) elif j == 1: r = stub2.Node(out) img_part = received_message(r.message,current_layer,j,writer2) # goodput img_part_list.append(img_part) if len(img_part_list) == 1: final_out = img_part else: final_out = torch.cat((final_out,img_part),2) if len(img_part_list) == worker: img_part_list = [] print("\tAfter Marge: " + str(final_out.size())) total_transmission_time += transmission_time/worker print(f"total_transmission_time: {total_transmission_time}") transmission_time = 0 #break # first conv layer # Adaptive average Pool avgpool = nn.AdaptiveAvgPool2d((1, 1)) out = avgpool(final_out) input_ = out channel = input_.shape[1] current_layer += 1 # goodput #after_part = userObject.partition_algorithm("ff", input_, f=0) print(f"\nCurrent Fc layer: {current_layer}") for j in range(worker): img_part = input_ #img_part = adaptive_partitioning(input_,after_part[j]) #flat_img = torch.flatten(img_part,1) #print(f"img_part: {img_part.size()} in bytes: {img_part.element_size() * img_part.nelement()}") sent_goodput[j][current_layer] = img_part.element_size() * img_part.nelement() # goodput msg = send_message("ff", img_part, 0, BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) #prev_input_units = flat_img.shape[1] if j == 0: r = stub1.Node(out) img_part = received_message(r.message,current_layer,j,writer1) # goodput classify_list.append(img_part) elif j == 1: r = stub2.Node(out) img_part = received_message(r.message,current_layer,j,writer2) # goodput classify_list.append(img_part) if len(classify_list) == worker: classify_final = None for i in range(len(classify_list)-1): if i == 0: classify_final = np.bitwise_or(classify_list[i].numpy()[:], classify_list[i+1].numpy()[:]) else: classify_final = np.bitwise_or(classify_final,classify_list[i+1].numpy()[:]) classify_list = [] total_transmission_time += transmission_time/worker print(f"total_transmission_time: {total_transmission_time}") print(f"Transmission time (one node): {total_transmission_time}") transmission_time = 0 total_transmission_time = 0 epoch += 1 if epoch == 1: break #break # one epoch except Exception as e: print("main:ERROR") print(e)
def main(): global transmission_time, total_computation_time models = {1: "AlexNet", 2: "ResNet34", 3: "VGG16"} m = input("Enter Model Number [1: AlexNet 2: ResNet34 3: VGG16]: ") pretrained_model = models[int(m)] # open a gRPC channel try: # write file in csv format #csv_file = open('/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_alexnet_cpu.csv','a') #fieldnames = ['computation_time', 'transmission_time'] #writer = csv.DictWriter(csv_file, fieldnames=fieldnames) #writer.writeheader() csv_file1 = open( '/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_alexnet_cpu_node_1.csv', 'a') csv_file2 = open( '/home/arnab/Desktop/DNN/gRPC/logs/gRPC_time_state_alexnet_cpu_node_2.csv', 'a') fieldnames = [ 'layer', 'transmission_time', 'transmission_data', 'computation_time' ] writer1 = csv.DictWriter(csv_file1, fieldnames=fieldnames) writer1.writeheader() writer2 = csv.DictWriter(csv_file2, fieldnames=fieldnames) writer2.writeheader() channel1 = grpc.insecure_channel('192.168.0.106:50051') channel2 = grpc.insecure_channel('192.168.0.107:50051') # create a stub (client) stub1 = message_pb2_grpc.CalculatorStub(channel1) stub2 = message_pb2_grpc.CalculatorStub(channel2) current_layer = None img_part_list = [] classify_list = [] prev_input_units = 0 total_transmission_time = 0 final_out = None userObject = user.User(pretrained_model=pretrained_model) BATCH_SIZE = userObject.BATCH_SIZE worker = userObject.worker loader = userObject.image_loader() epoch = 0 for img, level in loader: print(f"Epoch: {epoch}") for i, k in enumerate(userObject.kernel_filters): current_layer = i if i == 0: input_ = img channel = input_.shape[1] else: input_ = final_out channel = input_.shape[1] after_part = userObject.partition_algorithm("conv", input_, f=k) for j in range(len(after_part)): img_part = adaptive_partitioning( input_, after_part[j]) # partitioned image sent_goodput[j][current_layer] = img_part.element_size( ) * img_part.nelement() # goodput msg = send_message("conv", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, 0) out = message_pb2.Request(message=msg) if j == 0: r = stub1.Node(out) img_part = received_message(r.message, current_layer, j, writer1) # goodput img_part_list.append(img_part) elif j == 1: r = stub2.Node(out) img_part = received_message(r.message, current_layer, j, writer2) # goodput img_part_list.append(img_part) if len(img_part_list) == 1: final_out = img_part else: final_out = torch.cat((final_out, img_part), 2) if len(img_part_list) == worker: img_part_list = [] print("\tAfter Marge: " + str(final_out.size())) total_transmission_time += transmission_time / worker print( f"total_transmission_time: {total_transmission_time}") transmission_time = 0 # Adaptive average Pool avgpool = nn.AdaptiveAvgPool2d((6, 6)) out = avgpool(final_out) input_ = out channel = input_.shape[1] current_layer += 1 after_part = userObject.partition_algorithm("ff", input_, f=0) for j in range(len(after_part)): img_part = adaptive_partitioning(input_, after_part[j]) flat_img = torch.flatten(img_part, 1) sent_goodput[j][current_layer] = img_part.element_size( ) * img_part.nelement() # goodput msg = send_message("ff", img_part, after_part[j], BATCH_SIZE, current_layer, epoch, pretrained_model, prev_input_units) prev_input_units = flat_img.shape[1] out = message_pb2.Request(message=msg) if j == 0: r = stub1.Node(out) img_part = received_message(r.message, current_layer, j, writer1) # goodput classify_list.append(img_part) elif j == 1: r = stub2.Node(out) img_part = received_message(r.message, current_layer, j, writer2) # goodput classify_list.append(img_part) if len(classify_list) == worker: classify_final = None for i in range(len(classify_list) - 1): if i == 0: classify_final = np.bitwise_or( classify_list[i].numpy()[:], classify_list[i + 1].numpy()[:]) else: classify_final = np.bitwise_or( classify_final, classify_list[i + 1].numpy()[:]) classify_list = [] total_transmission_time += transmission_time / worker print(f"total_transmission_time: {total_transmission_time}") #break # first conv layer logging.info(f"Epoch: {epoch}") logging.info( f"Computation time (one node): {total_computation_time/worker}" ) print( f"Computation time (one node): {total_computation_time/worker}" ) logging.info( f"Transmission time (one node): {total_transmission_time}\n") print(f"Transmission time (one node): {total_transmission_time}") #writer.writerow({'computation_time':total_computation_time/worker,'transmission_time':total_transmission_time}) transmission_time = 0 total_computation_time = 0 total_transmission_time = 0 epoch += 1 if epoch == 1: break except Exception as e: print("main:ERROR") print(e) """
def __init__(self, grpc_addr, redis_addr): self._chan = grpc.insecure_channel(grpc_addr) self._stub = message_pb2_grpc.CalculatorStub(self._chan) redis_host = redis_addr.split(":")[0] redis_port = int(redis_addr.split(":")[1]) self._redis = redis.Redis(host=redis_host, port=redis_port, db=0)