def ready(self, args, train): # len * batch self.idxs = T.imatrix() self.idys = T.imatrix() self.init_state = T.matrix(dtype=theano.config.floatX) dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX) self.dropout = theano.shared(dropout_prob) self.n_d = args["hidden_dim"] embedding_layer = EmbeddingLayer(n_d=self.n_d, vocab=set(w for w in train)) self.n_V = embedding_layer.n_V say("Vocab size: {}\tHidden dim: {}\n".format(self.n_V, self.n_d)) activation = get_activation_by_name(args["activation"]) rnn_layer = LSTM(n_in=self.n_d, n_out=self.n_d, activation=activation) output_layer = Layer( n_in=self.n_d, n_out=self.n_V, activation=T.nnet.softmax, ) # (len*batch) * n_d x_flat = embedding_layer.forward(self.idxs.ravel()) # len * batch * n_d x = apply_dropout(x_flat, self.dropout) x = x.reshape((self.idxs.shape[0], self.idxs.shape[1], self.n_d)) # len * batch * (n_d+n_d) h = rnn_layer.forward_all(x, self.init_state, return_c=True) self.last_state = h[-1] h = h[:, :, self.n_d:] h = apply_dropout(h, self.dropout) self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape)) idys = self.idys.ravel() self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys]) #self.nll = T.nnet.categorical_crossentropy( # self.p_y_given_x, # idys # ) self.layers = [embedding_layer, rnn_layer, output_layer] #self.params = [ x_flat ] + rnn_layer.params + output_layer.params self.params = embedding_layer.params + rnn_layer.params + output_layer.params self.num_params = sum( len(x.get_value(borrow=True).ravel()) for l in self.layers for x in l.params) say("# of params in total: {}\n".format(self.num_params))
def ready(self): args = self.args embedding_layer = self.embedding_layer self.n_hidden = args.hidden_dim self.n_in = embedding_layer.n_d dropout = self.dropout = theano.shared( np.float64(args.dropout_rate).astype(theano.config.floatX)) # x is length * batch_size # y is batch_size self.x = T.imatrix('x') self.y = T.ivector('y') x = self.x y = self.y n_hidden = self.n_hidden n_in = self.n_in # fetch word embeddings # (len * batch_size) * n_in slices = embedding_layer.forward(x.ravel()) self.slices = slices # 3-d tensor, len * batch_size * n_in slices = slices.reshape((x.shape[0], x.shape[1], n_in)) # stacking the feature extraction layers pooling = args.pooling depth = args.depth layers = self.layers = [] prev_output = slices prev_output = apply_dropout(prev_output, dropout, v2=True) size = 0 softmax_inputs = [] activation = get_activation_by_name(args.act) for i in range(depth): if args.layer.lower() == "lstm": layer = LSTM(n_in=n_hidden if i > 0 else n_in, n_out=n_hidden) elif args.layer.lower() == "strcnn": layer = StrCNN(n_in=n_hidden if i > 0 else n_in, n_out=n_hidden, activation=activation, decay=args.decay, order=args.order) elif args.layer.lower() == "rcnn": layer = RCNN(n_in=n_hidden if i > 0 else n_in, n_out=n_hidden, activation=activation, order=args.order, mode=args.mode) else: raise Exception("unknown layer type: {}".format(args.layer)) layers.append(layer) prev_output = layer.forward_all(prev_output) if pooling: softmax_inputs.append(T.sum(prev_output, axis=0)) # summing over columns else: softmax_inputs.append(prev_output[-1]) prev_output = apply_dropout(prev_output, dropout) size += n_hidden # final feature representation is the concatenation of all extraction layers if pooling: softmax_input = T.concatenate(softmax_inputs, axis=1) / x.shape[0] else: softmax_input = T.concatenate(softmax_inputs, axis=1) softmax_input = apply_dropout(softmax_input, dropout, v2=True) # feed the feature repr. to the softmax output layer layers.append( Layer(n_in=size, n_out=self.nclasses, activation=softmax, has_bias=False)) for l, i in zip(layers, range(len(layers))): say("layer {}: n_in={}\tn_out={}\n".format(i, l.n_in, l.n_out)) # unnormalized score of y given x self.p_y_given_x = layers[-1].forward(softmax_input) self.pred = T.argmax(self.p_y_given_x, axis=1) self.nll_loss = T.mean( T.nnet.categorical_crossentropy(self.p_y_given_x, y)) # adding regularizations self.l2_sqr = None self.params = [] for layer in layers: self.params += layer.params for p in self.params: if self.l2_sqr is None: self.l2_sqr = args.l2_reg * T.sum(p**2) else: self.l2_sqr += args.l2_reg * T.sum(p**2) nparams = sum(len(x.get_value(borrow=True).ravel()) \ for x in self.params) say("total # parameters: {}\n".format(nparams))
def ready(self): args = self.args embedding_layer = self.embedding_layer self.n_hidden = args.hidden_dim self.n_in = embedding_layer.n_d dropout = self.dropout = theano.shared( np.float64(args.dropout_rate).astype(theano.config.floatX) ) # x is length * batch_size # y is batch_size self.x = T.imatrix('x') self.y = T.ivector('y') x = self.x y = self.y n_hidden = self.n_hidden n_in = self.n_in # fetch word embeddings # (len * batch_size) * n_in slices = embedding_layer.forward(x.ravel()) self.slices = slices # 3-d tensor, len * batch_size * n_in slices = slices.reshape( (x.shape[0], x.shape[1], n_in) ) # stacking the feature extraction layers pooling = args.pooling depth = args.depth layers = self.layers = [ ] prev_output = slices prev_output = apply_dropout(prev_output, dropout, v2=True) size = 0 softmax_inputs = [ ] activation = get_activation_by_name(args.act) for i in range(depth): if args.layer.lower() == "lstm": layer = LSTM( n_in = n_hidden if i > 0 else n_in, n_out = n_hidden ) elif args.layer.lower() == "strcnn": layer = StrCNN( n_in = n_hidden if i > 0 else n_in, n_out = n_hidden, activation = activation, decay = args.decay, order = args.order ) elif args.layer.lower() == "rcnn": layer = RCNN( n_in = n_hidden if i > 0 else n_in, n_out = n_hidden, activation = activation, order = args.order, mode = args.mode ) else: raise Exception("unknown layer type: {}".format(args.layer)) layers.append(layer) prev_output = layer.forward_all(prev_output) if pooling: softmax_inputs.append(T.sum(prev_output, axis=0)) # summing over columns else: softmax_inputs.append(prev_output[-1]) prev_output = apply_dropout(prev_output, dropout) size += n_hidden # final feature representation is the concatenation of all extraction layers if pooling: softmax_input = T.concatenate(softmax_inputs, axis=1) / x.shape[0] else: softmax_input = T.concatenate(softmax_inputs, axis=1) softmax_input = apply_dropout(softmax_input, dropout, v2=True) # feed the feature repr. to the softmax output layer layers.append( Layer( n_in = size, n_out = self.nclasses, activation = softmax, has_bias = False ) ) for l,i in zip(layers, range(len(layers))): say("layer {}: n_in={}\tn_out={}\n".format( i, l.n_in, l.n_out )) # unnormalized score of y given x self.p_y_given_x = layers[-1].forward(softmax_input) self.pred = T.argmax(self.p_y_given_x, axis=1) self.nll_loss = T.mean( T.nnet.categorical_crossentropy( self.p_y_given_x, y )) # adding regularizations self.l2_sqr = None self.params = [ ] for layer in layers: self.params += layer.params for p in self.params: if self.l2_sqr is None: self.l2_sqr = args.l2_reg * T.sum(p**2) else: self.l2_sqr += args.l2_reg * T.sum(p**2) nparams = sum(len(x.get_value(borrow=True).ravel()) \ for x in self.params) say("total # parameters: {}\n".format(nparams))
def ready(self, args, train): # len * batch self.idxs = T.imatrix() self.idys = T.imatrix() self.init_state = T.matrix(dtype=theano.config.floatX) dropout_prob = np.float64(args["dropout"]).astype(theano.config.floatX) self.dropout = theano.shared(dropout_prob) self.n_d = args["hidden_dim"] embedding_layer = EmbeddingLayer( n_d = self.n_d, vocab = set(w for w in train) ) self.n_V = embedding_layer.n_V say("Vocab size: {}\tHidden dim: {}\n".format( self.n_V, self.n_d )) activation = get_activation_by_name(args["activation"]) rnn_layer = LSTM( n_in = self.n_d, n_out = self.n_d, activation = activation ) output_layer = Layer( n_in = self.n_d, n_out = self.n_V, activation = T.nnet.softmax, ) # (len*batch) * n_d x_flat = embedding_layer.forward(self.idxs.ravel()) # len * batch * n_d x = apply_dropout(x_flat, self.dropout) x = x.reshape( (self.idxs.shape[0], self.idxs.shape[1], self.n_d) ) # len * batch * (n_d+n_d) h = rnn_layer.forward_all(x, self.init_state, return_c=True) self.last_state = h[-1] h = h[:,:,self.n_d:] h = apply_dropout(h, self.dropout) self.p_y_given_x = output_layer.forward(h.reshape(x_flat.shape)) idys = self.idys.ravel() self.nll = -T.log(self.p_y_given_x[T.arange(idys.shape[0]), idys]) #self.nll = T.nnet.categorical_crossentropy( # self.p_y_given_x, # idys # ) self.layers = [ embedding_layer, rnn_layer, output_layer ] #self.params = [ x_flat ] + rnn_layer.params + output_layer.params self.params = embedding_layer.params + rnn_layer.params + output_layer.params self.num_params = sum(len(x.get_value(borrow=True).ravel()) for l in self.layers for x in l.params) say("# of params in total: {}\n".format(self.num_params))
def ready(self): args = self.args embedding_layer = self.embedding_layer user_embedding_layer = self.user_embedding_layer self.n_hidden = args.hidden_dim self.n_in = embedding_layer.n_d dropout = self.dropout = theano.shared( np.float64(args.dropout_rate).astype(theano.config.floatX) ) # x is length * batch_size # y is batch_size self.x = T.imatrix('x') self.w_masks = T.fmatrix('mask') self.w_lens = T.fvector('lens') self.s_ml = T.iscalar('sent_maxlen') self.s_num = T.iscalar('sent_num') self.y = T.ivector('y') self.usr = T.ivector('users') x = self.x y = self.y usr = self.usr w_masks = self.w_masks w_lens = self.w_lens s_ml = self.s_ml s_num = self.s_num n_hidden = self.n_hidden n_emb = n_in = self.n_in layers = self.layers = [] slicesu = user_embedding_layer.forward(usr) slices = embedding_layer.forward(x.ravel()) self.slices = slices # important for updating word embeddings # 3-d tensor, len * batch_size * n_in slices = slices.reshape((x.shape[0], x.shape[1], n_in)) pooling = args.pooling prev_output = slices prev_output = apply_dropout(prev_output, dropout, v2=True) size = 0 n_hidden_t = n_hidden if args.direction == "bi": n_hidden_t = 2 * n_hidden softmax_inputs = [] activation = get_activation_by_name(args.act) if args.layer.lower() == "lstm": layer = LSTM(n_in=n_in, n_out=n_hidden_t, direction=args.direction ) elif args.layer.lower() == "cnn": layer = CNN(n_in=n_in, n_out=n_hidden_t, activation=activation, order=args.order ) else: raise Exception("unknown layer type: {}".format(args.layer)) layers.append(layer) prev_output = layer.forward_all(prev_output, masks=w_masks) prev_output = apply_dropout(prev_output, dropout) # final feature representation is the concatenation of all extraction layers if args.user_atten: layer = IterAttentionLayer( n_in=n_emb, n_out=n_hidden_t ) layers.append(layer) if args.user_atten_base: slicesu = None softmax_input = layers[-1].multi_hop_forward( prev_output, user_embs=slicesu, isWord=True, masks=w_masks) else: if pooling: softmax_input = T.sum(prev_output, axis=0) / w_lens.dimshuffle(0, 'x') else: ind = T.cast(w_lens - T.ones_like(w_lens), 'int32') softmax_input = prev_output[T.arange(ind.shape[0]), ind] softmax_input = apply_dropout(softmax_input, dropout, v2=True) n_in = n_hidden_t size = 0 softmax_inputs = [] [sentlen, emblen] = T.shape(softmax_input) prev_output = softmax_input.reshape( (sentlen / s_num, s_num, emblen)).dimshuffle(1, 0, 2) if args.layer.lower() == "lstm": layer = LSTM(n_in=n_in, n_out=n_hidden_t, direction=args.direction ) elif args.layer.lower() == "cnn": layer = CNN(n_in=n_in, n_out=n_hidden_t, activation=activation, order=args.order, ) else: raise Exception("unknown layer type: {}".format(args.layer)) layers.append(layer) prev_output = layer.forward_all(prev_output) prev_output = apply_dropout(prev_output, dropout) if args.user_atten: layer = IterAttentionLayer( n_in=n_emb, n_out=n_hidden_t ) layers.append(layer) if args.user_atten_base: slicesu = None softmax_input = layers[-1].multi_hop_forward( prev_output, user_embs=slicesu, isWord=False) else: if pooling: softmax_input = T.sum(prev_output, axis=0) / \ T.cast(s_num, 'float32') else: softmax_input = prev_output[-1] softmax_input = apply_dropout(softmax_input, dropout, v2=True) size = n_hidden_t layers.append(Layer( n_in=size, n_out=self.nclasses, activation=softmax, has_bias=False )) if not args.fix_emb: for l, i in zip(layers, range(len(layers))): say("layer {}: n_in={}\tn_out={}\n".format( i, l.n_in, l.n_out )) else: for l, i in zip(layers[1:], range(len(layers[1:]))): say("layer {}: n_in={}\tn_out={}\n".format( i, l.n_in, l.n_out )) # unnormalized score of y given x self.p_y_given_x = layers[-1].forward(softmax_input) self.pred = T.argmax(self.p_y_given_x, axis=1) self.nll_loss = T.mean(T.nnet.categorical_crossentropy( self.p_y_given_x, y )) # adding regularizations self.l2_sqr = None self.params = [] for layer in layers: self.params += layer.params for p in self.params: if self.l2_sqr is None: self.l2_sqr = args.l2_reg * T.sum(p**2) else: self.l2_sqr += args.l2_reg * T.sum(p**2) nparams = sum(len(x.get_value(borrow=True).ravel()) for x in self.params) say("total # parameters: {}\n".format(nparams))