counter = val[1] if counter%10000 == 0: print("######### validate") socket.push(['validate', peaknet.model]) elif counter%10 == 0: print("######### validateSubset") socket.push(['validateSubset', peaknet.model]) fname = os.path.join(outdir, str(kk)+".pkl") if kk%3 == 0: torch.save(peaknet.model, fname) kk += 1 elif(val[0] == "Gradient"): # val is the gradient #Step 7: Queen recieves new model from client grads = val[1] mini_batch_size = val[2] #Step 8: Queen does updateGradient(new model from client) peaknet.set_optimizer(adagrad=True, lr=lr) peaknet.updateGrad(grads, mini_batch_size, useGPU=runMasterOnGPU) #Step 9: Queen Optimizes peaknet.optimize() #Step 10: Repeat Steps 3-10 model_dict = dict(peaknet.model.named_parameters()) #TODO: Every checkpoint # models, the model will be saved to MongoDB
### Peaknet setup ### net = Peaknet() net.loadCfg("/reg/neh/home/liponan/ai/pytorch-yolo2/cfg/newpeaksv10-asic.cfg") net.init_model() net.model print("done model setup") ##################### context = zmq.Context() socket = context.socket(zmq.REP) socket.bind("tcp://*:5556") while True: # Wait for next request from client message = socket.recv_pyobj() grads, delta = message # 'messsage' always has two components print("Received request. delta:", delta) # let's not to print out the grads if delta > 0: # delta = net.set_optimizer( adagrad=True) # number of images trained in the last iteration net.updateGrad(grads=grads, delta=delta, useGPU=False) net.optimize() print("imgs seen:", net.model.seen) if net.model.seen % n_validate == 0 and net.model.seen > 0: socket.send_pyobj(["validate", net.model]) else: socket.send_pyobj(["train", net.model])