grad_valid_weight = T.tensor4('grad_valid_weight')

X_elementary, Y_elementary, X_test, Y_test = load_dataset(args) #normalized
#Use a large validation set (as in CPU experiments) to avoid overfitting the hyperparameters
X_hyper = X_elementary[0:5000]
Y_hyper = Y_elementary[0:5000]
X_elementary = X_elementary[5000:]
Y_elementary = Y_elementary[5000:]


#TODO: seeds for dropout, reinitialize BN layers
#import lasagne.random
#np.random = np.random.RandomState(args.seed)
#rand = np.random.RandomState(args.seed)
#lasagne.random.set_rng(rand)
model = DenseNet(x=x, y=y, args=args)
#model = ConvNet(x=x, y=y, args=args)

velocities = [theano.shared(np.asarray(param.get_value(borrow=True)*0., dtype=theano.config.floatX), broadcastable=param.broadcastable, name=param.name+'_vel') for param in model.params_theta]
momLlr = args.momLlr









#make a directory with a timestamp name, and save results to it
import os
lr_ele_true = np.array(args.lrEle, theano.config.floatX)
mom = args.momEle  #momentum
lr_hyper = T.fscalar('lr_hyper')
grad_valid_weight = T.tensor4('grad_valid_weight')

X_elementary, Y_elementary, X_test, Y_test = load_dataset(args)  #normalized
#Use a large validation set (as in CPU experiments) to avoid overfitting the hyperparameters
X_hyper = X_elementary[0:5000]
Y_hyper = Y_elementary[0:5000]
X_elementary = X_elementary[5000:]
Y_elementary = Y_elementary[5000:]

#TODO: seeds for dropout, reinitialize BN layers
lasagne.random.get_rng().seed(args.seed + 1)
model = DenseNet(x=x, y=y, args=args)
lasagne.random.get_rng().seed(
    args.seed)  #reset the seed; weight initialization used randomness, too
"""
for layer in model.layers: 
    if 'Dropout' in str(layer):
        layer.reinit()
"""

velocities = [
    theano.shared(np.asarray(param.get_value(borrow=True) * 0.,
                             dtype=theano.config.floatX),
                  broadcastable=param.broadcastable,
                  name=param.name + '_vel') for param in model.params_theta
]
momLlr = args.momLlr