def transduce(self, src: ExpressionSequence) -> ExpressionSequence: src = src.as_tensor() src_height = src.dim()[0][0] src_width = src.dim()[0][1] # src_channels = 1 batch_size = src.dim()[1] # convolution and pooling layers # src dim is ((40, 1000), 128) src = padding(src, self.filter_width[0]+3) l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filters1), stride = [self.stride[0], self.stride[0]], is_valid = True)) # ((1, 1000, 64), 128) pool1 = dy.maxpooling2d(l1, (1, 4), (1,2), is_valid = True) #((1, 499, 64), 128) pool1 = padding(pool1, self.filter_width[1]+3) l2 = dy.rectify(dy.conv2d(pool1, dy.parameter(self.filters2), stride = [self.stride[1], self.stride[1]], is_valid = True))# ((1, 499, 512), 128) pool2 = dy.maxpooling2d(l2, (1, 4), (1,2), is_valid = True)#((1, 248, 512), 128) pool2 = padding(pool2, self.filter_width[2]) l3 = dy.rectify(dy.conv2d(pool2, dy.parameter(self.filters3), stride = [self.stride[2], self.stride[2]], is_valid = True))# ((1, 248, 1024), 128) pool3 = dy.max_dim(l3, d = 1) my_norm = dy.l2_norm(pool3) + 1e-6 output = dy.cdiv(pool3,my_norm) output = dy.reshape(output, (self.num_filters[2],), batch_size = batch_size) return ExpressionSequence(expr_tensor=output)
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: src = src.as_tensor() src_height = src.dim()[0][0] src_width = 1 batch_size = src.dim()[1] W = dy.parameter(self.pW) b = dy.parameter(self.pb) src = dy.reshape(src, (src_height, src_width), batch_size=batch_size) # ((276, 80, 3), 1) # convolution and pooling layers l1 = (W*src)+b output = dy.cdiv(l1,dy.sqrt(dy.squared_norm(l1))) return ExpressionSequence(expr_tensor=output)
def transduce(self, src: ExpressionSequence) -> ExpressionSequence: src = src.as_tensor() # convolutional layer src = padding(src, src.dim()[0][0], src.dim()[0][1], self.filter_width, self.stride, src.dim()[1]) l1 = dy.rectify(dy.conv2d(src, dy.parameter(self.filter_conv), stride = [self.stride, self.stride], is_valid = True)) timestep = l1.dim()[0][1] features = l1.dim()[0][2] batch_size = l1.dim()[1] # transpose l1 to be (timesetp, dim), but keep the batch_size. rhn_in = dy.reshape(l1, (timestep, features), batch_size = batch_size) rhn_in = [dy.pick(rhn_in, i) for i in range(timestep)] for l in range(self.rhn_num_hidden_layers): rhn_out = [] # initialize a random vector for the first state vector, keep the same batch size. prev_state = dy.parameter(self.init[l]) # begin recurrent high way network for t in range(timestep): for m in range(0, self.rhn_microsteps): H = dy.affine_transform([dy.parameter(self.recur[l][m][1]), dy.parameter(self.recur[l][m][0]), prev_state]) T = dy.affine_transform([dy.parameter(self.recur[l][m][3]), dy.parameter(self.recur[l][m][2]), prev_state]) if m == 0: H += dy.parameter(self.linear[l][0]) * rhn_in[t] T += dy.parameter(self.linear[l][1]) * rhn_in[t] H = dy.tanh(H) T = dy.logistic(T) prev_state = dy.cmult(1 - T, prev_state) + dy.cmult(T, H) # ((1024, ), batch_size) rhn_out.append(prev_state) if self.residual and l>0: rhn_out = [sum(x) for x in zip(rhn_out, rhn_in)] rhn_in = rhn_out # Compute the attention-weighted average of the activations rhn_in = dy.concatenate_cols(rhn_in) scores = dy.transpose(dy.parameter(self.attention[0][1]))*dy.tanh(dy.parameter(self.attention[0][0])*rhn_in) # ((1,510), batch_size) scores = dy.reshape(scores, (scores.dim()[0][1],), batch_size = scores.dim()[1]) attn_out = rhn_in*dy.softmax(scores) # # rhn_in.as_tensor() is ((1024,510), batch_size) softmax is ((510,), batch_size) return ExpressionSequence(expr_tensor = attn_out)