def accumulate_gradients(opts, lr, batchSize, net, res, mmap=None): for l in range(len(net['layers']) - 1, -1, -1): if res['dzdw'][l] is not None: if 'learningRate' not in net['layers'][l]: net['layers'][l]['learningRate'] = 1 else: pass if 'weightDecay' not in net['layers'][l]: net['layers'][l]['weightDecay'] = 1 else: pass thisLR = lr * net['layers'][l]['learningRate'] if 'weight' in net['layers'][l]: if net['layers'][l]['type'] == 'bfc': W1 = net['layers'][l]['weight'] W1grad = (1. / batchSize) * res['dzdw'][l] manifold = Stiefel(W1.shape[0], W1.shape[1]) W1Rgrad = manifold.egrad2rgrad(W1, W1grad) net['layers'][l]['weight'] = manifold.retr( W1, -thisLR * W1Rgrad) else: net['layers'][l]['weight'] = net['layers'][l][ 'weight'] - thisLR * (1. / batchSize) * res['dzdw'][l] else: pass return net, res