def build_nn(nn_input): d1 = nnbuilder.denseLayer(nn_input, 100, w_init=nn.XavierNormal()) drop = nn.dropout(d1, 0.5) d2 = nnbuilder.denseLayer(drop, 50, w_init=nn.XavierNormal()) drop2 = nn.dropout(d2, 0.5) d3 = nnbuilder.denseLayer(drop2, 25, w_init=nn.XavierNormal()) drop3 = nn.dropout(d3, 0.5) d3 = nnbuilder.denseLayer(drop3, 2, activation=nn.softmax) return d3
def dense_model(X, w_h, w_h2, w_o, p_drop_input, p_drop_hidden): X = nn.dropout(X, p_drop_input) h = nn.rectify(cgt.dot(X, w_h)) h = nn.dropout(h, p_drop_hidden) h2 = nn.rectify(cgt.dot(h, w_h2)) h2 = nn.dropout(h2, p_drop_hidden) py_x = nn.softmax(cgt.dot(h2, w_o)) return py_x
def dense_model3(X, w_h, w_h2, w_h3, w_o, p_drop_input, p_drop_hidden): X = nn.dropout(X, p_drop_input) h = nn.rectify(cgt.dot(X, w_h)) h = nn.dropout(h, p_drop_hidden[0]) h2 = nn.rectify(cgt.dot(h, w_h2)) h2 = nn.dropout(h2, p_drop_hidden[1]) h3 = nn.rectify(cgt.dot(h2, w_h3)) h3 = nn.dropout(h3, p_drop_hidden[2]) py_x = nn.softmax(cgt.dot(h3, w_o)) return py_x
def tinyconv_model(X, w, w2, p_drop): l1 = nn.conv2d(X, w, kernelshape=(3, 3), pad=(1, 1), stride=(3, 3)) l1a = nn.dropout(l1, p_drop) batchsize, channels, rows, cols = l1.shape l1flat = cgt.reshape(l1, [batchsize, channels * rows * cols]) pyx = nn.softmax(l1flat.dot(w2)) return l1, pyx
def tinyconv_model(X, w, w2, p_drop): l1 = nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1),stride=(3,3)) l1a = nn.dropout(l1, p_drop) batchsize,channels,rows,cols = l1.shape l1flat = cgt.reshape(l1, [batchsize,channels*rows*cols]) pyx = nn.softmax(l1flat.dot(w2)) return l1, pyx
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden): l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3, 3), pad=(1, 1))) l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2, 2)) l1 = nn.dropout(l1, p_drop_conv) l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3, 3), pad=(1, 1))) l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2, 2)) l2 = nn.dropout(l2, p_drop_conv) l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3, 3), pad=(1, 1))) l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2, 2)) batchsize, channels, rows, cols = l3b.shape l3 = cgt.reshape(l3b, [batchsize, channels * rows * cols]) l3 = nn.dropout(l3, p_drop_conv) l4 = nn.rectify(cgt.dot(l3, w4)) l4 = nn.dropout(l4, p_drop_hidden) pyx = nn.softmax(cgt.dot(l4, w_o)) return pyx
def convnet_model(X, w, w2, w3, w4, w_o, p_drop_conv, p_drop_hidden): l1a = nn.rectify(nn.conv2d(X, w, kernelshape=(3,3), pad=(1,1))) l1 = nn.max_pool_2d(l1a, kernelshape=(2, 2), stride=(2,2)) l1 = nn.dropout(l1, p_drop_conv) l2a = nn.rectify(nn.conv2d(l1, w2, kernelshape=(3,3), pad=(1,1))) l2 = nn.max_pool_2d(l2a, kernelshape=(2, 2), stride=(2,2)) l2 = nn.dropout(l2, p_drop_conv) l3a = nn.rectify(nn.conv2d(l2, w3, kernelshape=(3,3), pad=(1,1))) l3b = nn.max_pool_2d(l3a, kernelshape=(2, 2), stride=(2,2)) batchsize,channels,rows,cols = l3b.shape l3 = cgt.reshape(l3b, [batchsize, channels*rows*cols]) l3 = nn.dropout(l3, p_drop_conv) l4 = nn.rectify(cgt.dot(l3, w4)) l4 = nn.dropout(l4, p_drop_hidden) pyx = nn.softmax(cgt.dot(l4, w_o)) return pyx
def get_features_simple(nn_input, num_units=512, recurrent_layer=None): """ Determines how to process raw input into feature h_u. These features are then weighted at each step and used to determine the context vector. This is most likely problem specific. Though you're welcome to try the default.""" if recurrent_layer is None: recurrent_layer = temporalDenseLayer w_init = IIDUniform(-0.1, 0.1) activation = cgt.sigmoid l1_f = recurrent_layer(nn_input=nn_input, num_units=num_units, activation=activation, w_init=w_init) l1_d = dropout(l1_f, 0.4) #l1_b = recurrent_layer(nn_input=nn_input, num_units=num_units, backwards=True, activation=activation, w_init=w_init) #l1_plus = l1_f + l1_b return l1_d
def __init__(self, n_actions): Serializable.__init__(self, n_actions) cgt.set_precision('double') n_in = 128 o_no = cgt.matrix("o_no",fixed_shape=(None,n_in)) a_n = cgt.vector("a_n",dtype='i8') q_n = cgt.vector("q_n") oldpdist_np = cgt.matrix("oldpdists") nhid, nhid2 = 64, 64 h0 = (o_no - 128.0)/128.0 d0 = nn.dropout(h1, .2) h1 = nn.rectify(nn.Affine(128,nhid,weight_init=nn.IIDGaussian(std=.1))(d0)) d1 = nn.dropout(h1, .2) h2 = nn.rectify(nn.Affine(nhid,nhid2,weight_init=nn.IIDGaussian(std=.1))(d1)) # d2 = nn.dropout(h2, .2) probs_na = nn.softmax(nn.Affine(nhid2,n_actions,weight_init=nn.IIDGaussian(std=0.01))(d2)) logprobs_na = cgt.log(probs_na) b = cgt.size(o_no, 0) logps_n = logprobs_na[cgt.arange(b), a_n] surr = (logps_n*q_n).mean() kl = (oldpdist_np * cgt.log(oldpdist_np/probs_na)).sum(axis=1).mean() params = nn.get_parameters(surr) gradsurr = cgt.grad(surr, params) flatgrad = cgt.concatenate([p.flatten() for p in gradsurr]) lam = cgt.scalar() penobj = surr - lam * kl self._f_grad_lagrangian = cgt.function([lam, oldpdist_np, o_no, a_n, q_n], cgt.concatenate([p.flatten() for p in cgt.grad(penobj,params)])) self.f_pdist = cgt.function([o_no], probs_na) self.f_probs = cgt.function([o_no], probs_na) self.f_surr_kl = cgt.function([oldpdist_np, o_no, a_n, q_n], [surr, kl]) self.f_gradlogp = cgt.function([oldpdist_np, o_no, a_n, q_n], flatgrad) self.pc = ParamCollection(params)
def dropoutLayer(nn_input, p=0): return dropout(nn_input, p)
output = [cgt.broadcast("+", X.dot(W), b, "xx,1x")] elif layer.type == "ReLU": output = [nn.rectify(inputs[0])] elif layer.type == "Softmax": output = [nn.softmax(inputs[0])] elif layer.type == "LRN": # XXX needs params param = layer.lrn_param output = [ nn.lrn(inputs[0], param.alpha, param.beta, param.local_size) ] elif layer.type == "Concat": param = layer.concat_param output = [cgt.concatenate(inputs, param.concat_dim)] elif layer.type == "Dropout": output = [nn.dropout(inputs[0])] elif layer.type == "SoftmaxWithLoss": output = [nn.loglik_softmax(inputs[0], inputs[1])] elif layer.type == "Accuracy": output = [nn.zero_one_loss(inputs[0], inputs[1])] else: cgt.error("unrecognized layer type %s" % layer.type) assert output is not None # assert isinstance(output, cgt.Node) for i in xrange(len(layer.top)): name2node[layer.top[i]] = output[i] print "stored", layer.top[0] if layer.type != "Data": print "shape", layer.type, infer_shape(
b = name2node[bname] = cgt.shared(bval, name=bname, fixed_shape_mask="all") yname = layer.top[0] output = [cgt.broadcast("+",X.dot(W), b, "xx,1x") ] elif layer.type == "ReLU": output = [nn.rectify(inputs[0])] elif layer.type == "Softmax": output = [nn.softmax(inputs[0])] elif layer.type == "LRN": # XXX needs params param = layer.lrn_param output = [nn.lrn(inputs[0], param.alpha,param.beta, param.local_size)] elif layer.type == "Concat": param = layer.concat_param output = [cgt.concatenate(inputs, param.concat_dim) ] elif layer.type == "Dropout": output = [nn.dropout(inputs[0])] elif layer.type == "SoftmaxWithLoss": output = [nn.loglik_softmax(inputs[0], inputs[1])] elif layer.type == "Accuracy": output = [nn.zero_one_loss(inputs[0], inputs[1])] else: cgt.error("unrecognized layer type %s"%layer.type) assert output is not None # assert isinstance(output, cgt.Node) for i in xrange(len(layer.top)): name2node[layer.top[i]] = output[i] print "stored", layer.top[0] if layer.type != "Data": print "shape",layer.type, infer_shape(name2node[layer.bottom[0]]), infer_shape(name2node[layer.top[0]])