Exemple #1
0
        counter = val[1]

        if counter%10000 == 0:
            print("######### validate")
            socket.push(['validate', peaknet.model])
        elif counter%10 == 0:
            print("######### validateSubset")
            socket.push(['validateSubset', peaknet.model])

        fname = os.path.join(outdir, str(kk)+".pkl")
        if kk%3 == 0:
            torch.save(peaknet.model, fname)
        kk += 1

    elif(val[0] == "Gradient"): # val is the gradient
        #Step 7: Queen recieves new model from client
        grads = val[1]
        mini_batch_size = val[2]
        #Step 8: Queen does updateGradient(new model from client)
        peaknet.set_optimizer(adagrad=True, lr=lr)

        peaknet.updateGrad(grads, mini_batch_size, useGPU=runMasterOnGPU)
        #Step 9: Queen Optimizes
        peaknet.optimize()

        #Step 10: Repeat Steps 3-10
        model_dict = dict(peaknet.model.named_parameters())
        #TODO: Every checkpoint # models, the model will be saved to MongoDB


### Peaknet setup ###

net = Peaknet()
net.loadCfg("/reg/neh/home/liponan/ai/pytorch-yolo2/cfg/newpeaksv10-asic.cfg")
net.init_model()
net.model
print("done model setup")

#####################

context = zmq.Context()
socket = context.socket(zmq.REP)
socket.bind("tcp://*:5556")

while True:
    #  Wait for next request from client
    message = socket.recv_pyobj()
    grads, delta = message  # 'messsage' always has two components
    print("Received request. delta:",
          delta)  # let's not to print out the grads
    if delta > 0:  # delta =
        net.set_optimizer(
            adagrad=True)  # number of images trained in the last iteration
        net.updateGrad(grads=grads, delta=delta, useGPU=False)
        net.optimize()
    print("imgs seen:", net.model.seen)
    if net.model.seen % n_validate == 0 and net.model.seen > 0:
        socket.send_pyobj(["validate", net.model])
    else:
        socket.send_pyobj(["train", net.model])