def initSequence(self): self.t = 0 self.x = {} self.h = {} self.c = {} self.ct = {} self.input_gate = {} self.forget_gate = {} self.output_gate = {} self.cell_update = {} if hasattr(self, 'previous'): self.h[0] = self.previous.h[self.previous.t] self.c[0] = self.previous.c[self.previous.t] else: self.h[0] = zeros(self.hidden_size) self.c[0] = zeros(self.hidden_size) if hasattr(self, 'next'): self.dh_prev = self.next.dh_prev self.dc_prev = self.next.dc_prev else: self.dh_prev = zeros(self.hidden_size) self.dc_prev = zeros(self.hidden_size) # reset all gradients to zero for name, param, grad in self.params: grad[:] = 0
def fromJSON(self, json): self.out_depth = json['out_depth'] self.out_sx = json['out_sx'] self.out_sy = json['out_sy'] self.layer_type = json['layer_type'] self.group_size = json['group_size'] self.switches = zeros(self.group_size)
def backward(self): # compute gradient wrt weights, biases and input data V = self.in_act V.dw = zeros(len(V.w)) # zero out gradient V_sx = V.sx V_sy = V.sy xy_stride = self.stride for d in xrange(self.out_depth): f = self.filters[d] x = -self.pad y = -self.pad for ay in xrange(self.out_sy): x = -self.pad for ax in xrange(self.out_sx): # convolve and add up the gradients chain_grad = self.out_act.get_grad(ax, ay, d) # gradient from above, from chain rule for fy in xrange(f.sy): off_y = y + fy for fx in xrange(f.sx): off_x = x + fx if off_y >= 0 and off_y < V_sy and off_x >= 0 and off_x < V_sx: # forward prop calculated: a += f.get(fx, fy, fd) * V.get(ox, oy, fd) #f.add_grad(fx, fy, fd, V.get(off_x, off_y, fd) * chain_grad) #V.add_grad(off_x, off_y, fd, f.get(fx, fy, fd) * chain_grad) for fd in xrange(f.depth): ix1 = ((V.sx * off_y) + off_x) * V.depth + fd ix2 = ((f.sx * fy) + fx) * f.depth + fd f.dw[ix2] += V.w[ix1] * chain_grad V.dw[ix1] += f.w[ix2] * chain_grad self.biases.dw[d] += chain_grad x += xy_stride y += xy_stride
def __init__(self, opt={}): self.group_size = getopt(opt, 'group_size', 2) self.out_sx = opt['in_sx'] self.out_sy = opt['in_sy'] self.out_depth = opt['in_depth'] / self.group_size self.layer_type = 'maxout' self.switches = zeros(self.out_sx * self.out_sy * self.out_depth)
def activate(self, feature_maps): """ feature_maps: (batch_size, # of input feature maps, height, width) after conv: (batch_size, # of filters, new h, new w) after pooling: (batch_size, # of filters, new h / poolsize, new w / poolsize) """ W = self.W b = self.b pool_h, pool_w = self.poolsize n_output, n_input, f_height, f_width = self.filter_shape batch_size, _, height, width = feature_maps.shape n_height, n_width = (height - f_height + 1, width - f_width + 1) assert feature_maps.shape[1] == n_input # do convolve2d if FLAGS.theano_conv: after_filter = self.conv2d_activate_valid(feature_maps, rot90(W)) else: after_filter = util.zeros((batch_size, n_output, n_height, n_width)) for index in np.ndindex(batch_size, n_output): i, q = index result = after_filter[index] for p in xrange(n_input): result += conv_valid(feature_maps[i][p], np.rot90(W[q][p], 2)) after_filter += b[np.newaxis, :, np.newaxis, np.newaxis] after_filter = self.activation(after_filter) # do pooling # borders are ignored self.M, ret = self.do_pooling(after_filter, self.poolsize) return ret
def py_do_pooling(after_filter, poolsize): batch_size, n_output, n_height, n_width = after_filter.shape pool_h, pool_w = poolsize ret_h = int(float(n_height) / pool_h) ret_w = int(float(n_width) / pool_w) ret = util.zeros((batch_size, n_output, ret_h, ret_w)) M = util.zeros((batch_size, n_output, n_height, n_width)) ret.fill(-np.inf) for i, j, h, w in np.ndindex(ret.shape): ret[i][j][h][w], ind = max_argmax(after_filter[i][j], h*pool_h, (h+1)*pool_h, w*pool_w, (w+1)*pool_w) M[i][j][ind] = 1 return M, ret
def error(self, _, input): """ error_output: Not used because we use error_before_pooling calculated in grad() M, error_before_pooling: (batch_size, # of filters, h_after_filter, w_after_filter) input: (batch_size, # of filters, height, width) return: (batch_size, # of input feature_maps, height, width) """ batch_size, _, height, width = input.shape n_output, n_input, _, _ = self.filter_shape assert input.shape[1] == n_input W = self.W grad_activation = self.grad_activation error_before_pooling = self.error_before_pooling if FLAGS.theano_conv: ret = self.conv2d_full(error_before_pooling, np.swapaxes(W, 0, 1)) else: ret = util.zeros((batch_size, n_input, height, width)) for i, j in np.ndindex(batch_size, n_input): result = ret[i][j] for k in xrange(n_output): result += conv_full(error_before_pooling[i][k], W[k][j]) ret = np.multiply(ret, grad_activation(input)) return ret
def grad(self, error_output, input): """ input: (batch_size, # of input feature maps, height, width) error_output: (batch_size, # of filters, h_after_filter, h_after_filter) """ import time pool_h, pool_w = self.poolsize error_before_pooling = np.copy(self.M) #upsample(error_before_pooling, error_output, pool_h, pool_w) ct_upsample(error_before_pooling, error_output, pool_h, pool_w) error_output = error_before_pooling self.error_before_pooling = error_before_pooling batch_size = input.shape[0] if FLAGS.theano_conv: # (p, i, :, :) (q, i, :, :) # (p, q, :, :) rot_error_output = rot90(error_output) W_grad = self.conv2d_grad_valid(np.swapaxes(input, 0, 1), np.swapaxes(rot_error_output, 0, 1)) W_grad = np.swapaxes(W_grad, 0, 1) else: W_grad = util.zeros(self.W.shape) for i, q, p in np.ndindex(batch_size, *self.W.shape[:2]): W_grad[q][p] = conv_valid(input[i][p], np.rot90(error_output[i][q], 2)) b_grad = np.sum(error_output, axis=(0, 2, 3)) self.W_grad = W_grad / input.shape[0] self.b_grad = b_grad / input.shape[0]
def backward(self): V = self.in_act V2 = self.out_act N = len(V.w) V.dw = zeros(N) # zero out gradient wrt data for i in xrange(N): v2wi = V2.w[i] V.dw[i] = v2wi * (1.0 - v2wi) * V2.dw[i]
def __init__(self, image_shape, filter_shape, poolsize, bound, activation, grad_activation): """ Allocate a LeNetConvPoolLayer with shared variable internal parameters. image_shape: (batch_size, # of input feature_maps, height, width) :type filter_shape: tuple or list of length 4 :param filter_shape: (number of output filters, num input feature maps, filter height, filter width) :type poolsize: tuple or list of length 2 :param poolsize: the downsampling (pooling) factor (#rows, #cols) """ if not bound: fan_in = np.prod(filter_shape[1:]) fan_out = ((filter_shape[0] * np.prod(filter_shape[2:])) / (np.prod(poolsize))) bound = np.sqrt(6.0 / (fan_in + fan_out)) self.W = util.normal(filter_shape, bound) self.b = util.zeros((filter_shape[0],)) self.activation = activation self.grad_activation = grad_activation self.poolsize = poolsize self.filter_shape = filter_shape self.output_shape = (image_shape[0], filter_shape[0], (image_shape[2] - filter_shape[2] + 1) / poolsize[0], (image_shape[3] - filter_shape[3] + 1) / poolsize[1]) if FLAGS.theano_conv: self._conv2d_activate_valid = conv2d_func(image_shape, filter_shape, 'valid') self._conv2d_grad_valid = conv2d_func( (image_shape[1], image_shape[0], image_shape[2], image_shape[3]), (filter_shape[0], image_shape[0], image_shape[2] - filter_shape[2] + 1, image_shape[3] - filter_shape[3] + 1), 'valid') self._conv2d_full = conv2d_func( (image_shape[0], filter_shape[0], image_shape[2] - filter_shape[2] + 1, image_shape[3] - filter_shape[3] + 1), (filter_shape[1], filter_shape[0], filter_shape[2], filter_shape[3]), 'full') self.params = ['W', 'b'] self.W_inc_before = util.zeros(self.W.shape) self.b_inc_before = util.zeros(self.b.shape)
def backward(self): V = self.in_act V2 = self.out_act N = len(V.w) V.dw = zeros(N) # zero out gradient wrt data for i in xrange(N): if V2.w[i] <= 0: # threshold V.dw[i] = 0 else: V.dw[i] = V2.dw[i]
def __init__(self, opt={}): self.sx = opt['sx'] # filter size self.in_depth = opt['in_depth'] self.in_sx = opt['in_sx'] self.in_sy = opt['in_sy'] # optional self.sy = getopt(opt, 'sy', self.sx) self.stride = getopt(opt, 'stride', 2) self.pad = getopt(opt, 'pad', 0) # padding to borders of input volume self.out_depth = self.in_depth self.out_sx = int(floor((self.in_sx - self.sx + 2 * self.pad) / self.stride + 1)) self.out_sy = int(floor((self.in_sy - self.sy + 2 * self.pad) / self.stride + 1)) self.layer_type = 'pool' # Store switches for x,y coordinates for where the max comes from, for each output neuron switch_size = self.out_sx * self.out_sy * self.out_depth self.switch_x = zeros(switch_size) self.switch_y = zeros(switch_size)
def backward(self): # same as relu V = self.in_act V2 = self.out_act N = len(V.w) V.dw = zeros(N) # zero out gradient wrt data for i in xrange(N): if V2.w[i] <= 0: # threshold V.dw[i] = 0 else: V.dw[i] = V2.dw[i]
def ct_do_pooling(after_filter, poolsize): batch_size, n_output, n_height, n_width = after_filter.shape pool_h, pool_w = poolsize ret_h = int(float(n_height) / pool_h) ret_w = int(float(n_width) / pool_w) M = np.ascontiguousarray(util.zeros((batch_size, n_output, n_height, n_width))) ret = np.ascontiguousarray(util.zeros((batch_size, n_output, ret_h, ret_w))) assert after_filter.dtype == M.dtype _DLL.do_pooling(ct.c_int(after_filter.itemsize), ct.c_int(batch_size), ct.c_int(n_output), ct.c_int(n_height), ct.c_int(n_width), ct.c_int(pool_h), ct.c_int(pool_w), ct.c_int(ret_h), ct.c_int(ret_w), after_filter.ctypes.data_as(ct.c_void_p), ret.ctypes.data_as(ct.c_void_p), M.ctypes.data_as(ct.c_void_p)) return M, ret
def backward(self): V = self.in_act V.dw = zeros(len(V.w)) # zero out gradient # compute gradient wrt weights and data for i in xrange(self.out_depth): fi = self.filters[i] chain_grad = self.out_act.dw[i] for d in xrange(self.num_inputs): V.dw[d] += fi.w[d] * chain_grad #grad wrt input data fi.dw[d] += V.w[d] * chain_grad #grad wrt params self.biases.dw[i] += chain_grad
def __init__(self, inp, n_labels, n_hidden_previous, update_fn, training=None, keep_prob=None): if type(inp) == list: self.input = T.concatenate(inp) input_size = len(inp) * n_hidden_previous else: self.input = inp input_size = n_hidden_previous if training is not None: assert keep_prob is not None self.input = dropout(self.input, training, keep_prob) self.update_fn = update_fn # input -> hidden (sized somwhere between size of input & softmax) n_hidden = int(math.sqrt(input_size * n_labels)) print "concat sizing %s -> %s -> %s" % (input_size, n_hidden, n_labels) self.Wih = util.sharedMatrix(input_size, n_hidden, 'Wih') self.bh = util.shared(util.zeros((1, n_hidden)), 'bh') # hidden -> softmax self.Whs = util.sharedMatrix(n_hidden, n_labels, 'Whs') self.bs = util.shared(util.zeros((1, n_labels)), 'bs')
def backward(self, y): # compute and accumulate gradient wrt weights and bias of this layer x = self.in_act x.dw = zeros(len(x.w)) for i in xrange(self.out_depth): indicator = float(i == y) mul = - (indicator - self.es[i]) x.dw[i] = mul # loss is the class negative log likelihood try: return -log(self.es[y]) except ValueError: return -log(0.001)
def backward(self, y): # compute and accumulate gradient wrt weights and bias of this layer x = self.in_act x.dw = zeros(len(x.w)) for i in xrange(self.out_depth): indicator = float(i == y) mul = -(indicator - self.es[i]) x.dw[i] = mul # loss is the class negative log likelihood try: return -log(self.es[y]) except ValueError: return -log(0.001)
def backward(self, y): # compute and accumulate gradient wrt weights and bias of this layer x = self.in_act x.dw = zeros(len(x.w)) # zero out the gradient of input Vol yscore = x.w[y] margin = 1.0 loss = 0.0 for i in xrange(self.out_depth): if -yscore + x.w[i] + margin > 0: # Hinge loss: http://en.wikipedia.org/wiki/Hinge_loss x.dw[i] += 1 x.dw[y] -= 1 loss += -yscore + x.w[i] + margin return loss
def backward(self): # pooling layers have no parameters, so simply compute # gradient wrt data here V = self.in_act V.dw = zeros(len(V.w)) # zero out gradient wrt data A = self.out_act # computed in forward pass n = 0 for d in xrange(self.out_depth): x = -self.pad y = -self.pad for ax in xrange(self.out_sx): y = -self.pad for ay in xrange(self.out_sy): chain_grad = self.out_act.get_grad(ax, ay, d) V.add_grad(self.switch_x[n], self.switch_y[n], d, chain_grad) n += 1 y += self.stride x += self.stride
def backward(self): V = self.in_act V2 = self.out_act N = self.out_depth V.dw = zeros(len(V.w)) # zero out gradient wrt data # pass the gradient through the appropriate switch if self.sx == 1 and self.sy == 1: for i in range(N): chain_grad = V2.dw[i] V.dw[self.switches[i]] = chain_grad else: switch_counter = 0 for x in xrange(V2.sx): for y in xrange(V2.sy): for i in xrange(N): chain_grad = V2.get_grad(x,y,i) V.set_grad(x, y, self.switches[n], chain_grad) switch_counter += 1
def backward(self): V = self.in_act V2 = self.out_act N = self.out_depth V.dw = zeros(len(V.w)) # zero out gradient wrt data # pass the gradient through the appropriate switch if self.sx == 1 and self.sy == 1: for i in range(N): chain_grad = V2.dw[i] V.dw[self.switches[i]] = chain_grad else: switch_counter = 0 for x in xrange(V2.sx): for y in xrange(V2.sy): for i in xrange(N): chain_grad = V2.get_grad(x, y, i) V.set_grad(x, y, self.switches[n], chain_grad) switch_counter += 1
def __init__(self, name, input_dim, hidden_dim, opts, update_fn, h0, inputs, context=None, context_dim=None): self.name_ = name self.update_fn = update_fn self.h0 = h0 self.inputs = inputs # input sequence self.context = context # additional context to add at each timestep of input # hidden -> hidden self.Uh = util.sharedMatrix(hidden_dim, hidden_dim, 'Uh', orthogonal_init=True) # embedded input -> hidden self.Wh = util.sharedMatrix(hidden_dim, input_dim, 'Wh', orthogonal_init=True) # context -> hidden (if applicable) if self.context: self.Whc = util.sharedMatrix(hidden_dim, context_dim, 'Wch', orthogonal_init=True) # bias self.bh = util.shared(util.zeros((hidden_dim,)), 'bh')
def backward(self, y): # y is a list here of size num_inputs # compute and accumulate gradient wrt weights and bias of this layer x = self.in_act x.dw = zeros(len(x.w)) # zero out the gradient of input Vol loss = 0.0 if type(y) == list: for i in xrange(self.out_depth): dy = x.w[i] - y[i] x.dw[i] = dy loss += 2 * dy * dy else: # assume it is a dict with entries dim and val # and we pass gradient only along dimension dim to be equal to val i = y['dim'] y_i = y['val'] dy = x.w[i] - y_i x.dw[i] = dy loss += 2 * dy * dy return loss
def __init__(self, name, input_dim, hidden_dim, opts, update_fn, h0, inputs, context=None, context_dim=None): self.name_ = name self.update_fn = update_fn self.h0 = h0 self.inputs = inputs # input sequence self.context = context # additional context to add at each timestep of input # hidden -> hidden self.Uh = util.sharedMatrix(hidden_dim, hidden_dim, 'Uh', orthogonal_init=True) # embedded input -> hidden self.Wh = util.sharedMatrix(hidden_dim, input_dim, 'Wh', orthogonal_init=True) # context -> hidden (if applicable) if self.context: self.Whc = util.sharedMatrix(hidden_dim, context_dim, 'Wch', orthogonal_init=True) # bias self.bh = util.shared(util.zeros((hidden_dim, )), 'bh')
def backward(self): # compute gradient wrt weights, biases and input data V = self.in_act V.dw = zeros(len(V.w)) # zero out gradient V_sx = V.sx V_sy = V.sy xy_stride = self.stride for d in xrange(self.out_depth): f = self.filters[d] x = -self.pad y = -self.pad for ay in xrange(self.out_sy): x = -self.pad for ax in xrange(self.out_sx): # convolve and add up the gradients chain_grad = self.out_act.get_grad( ax, ay, d) # gradient from above, from chain rule for fy in xrange(f.sy): off_y = y + fy for fx in xrange(f.sx): off_x = x + fx if off_y >= 0 and off_y < V_sy and off_x >= 0 and off_x < V_sx: # forward prop calculated: a += f.get(fx, fy, fd) * V.get(ox, oy, fd) #f.add_grad(fx, fy, fd, V.get(off_x, off_y, fd) * chain_grad) #V.add_grad(off_x, off_y, fd, f.get(fx, fy, fd) * chain_grad) for fd in xrange(f.depth): ix1 = ( (V.sx * off_y) + off_x) * V.depth + fd ix2 = ((f.sx * fy) + fx) * f.depth + fd f.dw[ix2] += V.w[ix1] * chain_grad V.dw[ix1] += f.w[ix2] * chain_grad self.biases.dw[d] += chain_grad x += xy_stride y += xy_stride
def backward(self): # evaluate gradient wrt data V = self.in_act V.dw = zeros(len(V.w)) # zero out gradient wrt data A = self.out_act n2 = self.n / 2 for x in xrange(V.sx): for y in xrange(V.sy): for i in xrange(V.depth): chain_grad = self.out_act.get_grad(x, y, i) S = self.S_cache.get(x, y, i) S_b = S ** self.beta S_b2 = S_b * S_b # Normalize in a window of size n for j in xrange(max(0, i - n2), min(i + n2, V.depth - 1) + 1): a_j = V.get(x, y, j) grad = -(a_j ** 2) * self.beta * (S ** (self.beta - 1)) * self.alpha / self.n * 2.0 if j == i: grad += S_b grad /= S_b2 grad *= chain_grad V.add_grad(x, y, j, grad)
def __init_state(self): """Initialisation: Random assignments with equal probabilities """ ## initialise count variables. # number of word i assigned to topic j self.nw = zeros(self.V, self.K) # number of words in document i assigned to topic j. self.nd = zeros(self.M, self.K) # total number of words assigned to topic j. self.nwsum = [0] * self.K # total number of words in document i self.ndsum = [0] * self.M ## The z_i are are initialised to values in [1,K] to determine ## the initial state of the Markov chain. # topic assignments for each word. self.z = [] for m in range(self.M): N = len(self.documents[m]) self.z.append([0] * N) for n in range(N): topic = int(random.random() * self.K) self.z[m][n] = topic self.nw[documents[m][n]][topic] += 1 self.nd[m][topic] += 1 self.nwsum[topic] += 1 self.ndsum[m] = N ## if self.SAMPLE_LAG > 0: # cumulative statistics of theta self.thetasum = zeros(self.M, self.K) self.theta = zeros(self.M, self.K) # cumulative statistics of phi self.phisum = zeros(self.K, self.V) self.phi = zeros(self.K, self.V) # size of statistics self.numstats = 0 self.__print_init_state()
def __init__(self, input_size, hidden_size, init_range=1.0, previous=None): self.input_size, self.hidden_size = input_size, hidden_size if previous: self.previous = previous previous.next = self # initalize weights def init(x, y): return initalize((x, y), init_range) h, n = hidden_size, input_size self.W_hi, self.W_hf, self.W_ho, self.W_hj = init(h, h), init( h, h), init(h, h), init(h, h) self.W_xi, self.W_xf, self.W_xo, self.W_xj = init(h, n), init( h, n), init(h, n), init(h, n) self.b_i, self.b_f, self.b_o, self.b_j = zeros(h), ones(h) * 3, zeros( h), zeros(h) # initalize gradients self.dW_hi, self.dW_hf, self.dW_ho, self.dW_hj = zeros(h, h), zeros( h, h), zeros(h, h), zeros(h, h) self.dW_xi, self.dW_xf, self.dW_xo, self.dW_xj = zeros(h, n), zeros( h, n), zeros(h, n), zeros(h, n) self.db_i, self.db_f, self.db_o, self.db_j = zeros(h), zeros(h), zeros( h), zeros(h) # list of all parameters self.params = [ ('W_hi', self.W_hi, self.dW_hi), ('W_hf', self.W_hf, self.dW_hf), ('W_ho', self.W_ho, self.dW_ho), ('W_hj', self.W_hj, self.dW_hj), ('W_xi', self.W_xi, self.dW_xi), ('W_xf', self.W_xf, self.dW_xf), ('W_xo', self.W_xo, self.dW_xo), ('W_xj', self.W_xj, self.dW_xj), ('b_i', self.b_i, self.db_i), ('b_f', self.b_f, self.db_f), ('b_o', self.b_o, self.db_o), ('b_j', self.b_j, self.db_j), ] self.initSequence()
frames = 0 fps = 30 timea = time.time() bytes = 0 bytesPerSec = 0 while True: buffer = b"V" frames += 1 ret, raw_frame = capture.read() #Read camera frame2 = cv2.cvtColor(raw_frame, cv2.COLOR_BGR2GRAY) #gray = Filter(frame2,SHARPEN) #gray = floyd_steinberg(frame2) gray = Dither(frame2, zeros(frame2.shape), 125) #frameBytes = np.packbits(gray//255) #Convert it to bytes buffer += struct.pack("HH", gray.shape[0], gray.shape[1]) #Pack frame size into packet buffer += gray.tobytes() compressed = bz2.compress(buffer, 9) PublishSocket.send(compressed) #Send Packet bytes += len(compressed) if (time.time() - timea > 1): timea = time.time() fps = frames frames = 0 bytesPerSec += bytes / 1024 / 1024 bytesPerSec = bytesPerSec / 2 bytes = 0
def __init__(self, input_size, hidden_size, init_range=1.0, previous=None): self.input_size, self.hidden_size = input_size, hidden_size if previous: self.previous = previous previous.next = self # initalize weights def init(x,y): return initalize((x,y), init_range) h, n = hidden_size, input_size self.W_hi, self.W_hf, self.W_ho, self.W_hj = init(h, h), init(h, h), init(h, h), init(h, h) self.W_xi, self.W_xf, self.W_xo, self.W_xj = init(h, n), init(h, n), init(h, n), init(h, n) self.b_i, self.b_f, self.b_o, self.b_j = zeros(h), ones(h) * 3, zeros(h), zeros(h) # initalize gradients self.dW_hi, self.dW_hf, self.dW_ho, self.dW_hj = zeros(h, h), zeros(h, h), zeros(h, h), zeros(h, h) self.dW_xi, self.dW_xf, self.dW_xo, self.dW_xj = zeros(h, n), zeros(h, n), zeros(h, n), zeros(h, n) self.db_i, self.db_f, self.db_o, self.db_j = zeros(h), zeros(h), zeros(h), zeros(h) # list of all parameters self.params = [ ('W_hi', self.W_hi, self.dW_hi), ('W_hf', self.W_hf, self.dW_hf), ('W_ho', self.W_ho, self.dW_ho), ('W_hj', self.W_hj, self.dW_hj), ('W_xi', self.W_xi, self.dW_xi), ('W_xf', self.W_xf, self.dW_xf), ('W_xo', self.W_xo, self.dW_xo), ('W_xj', self.W_xj, self.dW_xj), ('b_i', self.b_i, self.db_i), ('b_f', self.b_f, self.db_f), ('b_o', self.b_o, self.db_o), ('b_j', self.b_j, self.db_j), ] self.initSequence()