def __init__(self, output_size, layers, preprocess_name="identity", preprocess_options=None, scale=1.0, initializer=None, name="deep_lstm"): """Creates an instance of `StandardDeepLSTM`. Args: output_size: Output sizes of the final linear layer. layers: Output sizes of LSTM layers. preprocess_name: Gradient preprocessing class name (in `l2l.preprocess` or tf modules). Default is `tf.identity`. preprocess_options: Gradient preprocessing options. scale: Gradient scaling (default is 1.0). initializer: Variable initializer for linear layer. See `nn.Linear` and `nn.LSTM` docs for more info. This parameter can be a string (e.g. "zeros" will be converted to tf.zeros_initializer). name: Module name. """ super(StandardDeepLSTM, self).__init__(name) self._output_size = output_size self._scale = scale if hasattr(preprocess, preprocess_name): preprocess_class = getattr(preprocess, preprocess_name) self._preprocess = preprocess_class(**preprocess_options) else: self._preprocess = getattr(tf, preprocess_name) with tf.variable_scope(self._template.variable_scope): self._cores = [] for i, size in enumerate(layers, start=1): name = "lstm_{}".format(i) init = _get_layer_initializers(initializer, name, ("w_gates", "b_gates")) self._cores.append(nn.LSTM(size, name=name, initializers=init)) self._rnn = nn.DeepRNN(self._cores, skip_connections=False, name="deep_rnn") init = _get_layer_initializers(initializer, "linear", ("w", "b")) self._linear = nn.Linear(output_size, name="linear", initializers=init)
def __init__(self): super(Model, self).__init__() self.c1 = nn.Conv2D((6, 6), 1, 10) self.c2 = nn.Conv2D((6, 6), 10, 15) self.c3 = nn.Conv2D((4, 4), 15, 20) self.c4 = nn.Conv2D((3, 3), 20, 25) self.lstm = nn.LSTM(13 * 13, 13 * 13) self.dense = nn.Dense(13 * 13, 10) self._parameters = { 'conv1': self.c1, 'conv2': self.c2, 'conv3': self.c3, 'conv4': self.c4, 'lstm': self.lstm, 'dense': self.dense, }
def __init__(self, flow, spec, lstm_feature_embeddings, lr_lstm, rl_lstm): # Add blobs for the lexical resources. lexicon = flow.blob("lexicon") lexicon.type = "dict" lexicon.add_attr("delimiter", 10) lexicon.add_attr("oov", spec.words.oov_index) normalization = "" if spec.words.normalize_digits: normalization = "d" lexicon.add_attr("normalization", normalization) lexicon.data = str(spec.words) + "\n" self.lexicon_blob = lexicon def read_file(filename): fin = open(filename, "r") data = fin.read() fin.close() return data f = tempfile.NamedTemporaryFile(delete=False) fname = f.name spec.commons.save(fname, binary=True) f.close() commons = flow.blob("commons") commons.type = "frames" commons.data = read_file(fname) os.unlink(fname) self.commons_blob = commons suffix = flow.blob("suffixes") suffix.type = "affix" suffix.data = str(spec.write_suffix_table()) self.suffix_blob = suffix # Add feature extraction related ops. bldr = builder.Builder(flow, "features") self.feature_ids = [] concat_args = [] for f, e in zip(spec.lstm_features, lstm_feature_embeddings): shape = [f.vocab_size, f.dim] embedding = bldr.var(name=f.name + "_embeddings", shape=shape) embedding.data = e ids_input = bldr.var(name=f.name, dtype="int32", shape=[1, f.num]) self.feature_ids.append(ids_input) gather_op_type = "Gather" if f.num > 1: gather_op_type = "GatherSum" gather_op = bldr.rawop(gather_op_type) gather_op.dtype = "float32" gather_op.add_input(embedding) gather_op.add_input(ids_input) gather_output = bldr.var(gather_op.name + ":0", "float32", [1, f.dim]) gather_op.add_output(gather_output) concat_args.append(gather_output) self.feature_vector = bldr.concat(concat_args) bldr.rename(self.feature_vector, "feature_vector") self.feature_vector.ref = True # Add BiLSTM. lr = builder.Builder(flow, "lstm/lr") lr_input = lr.var(name="input", shape=[1, spec.lstm_input_dim]) lr_input.ref = True flow_lr_lstm = nn.LSTM(lr, input=lr_input, size=spec.lstm_hidden_dim) lr_lstm.copy_to_flow_lstm(flow_lr_lstm) self.lr_lstm = flow_lr_lstm rl = builder.Builder(flow, "lstm/rl") rl_input = rl.var(name="input", shape=[1, spec.lstm_input_dim]) rl_input.ref = True flow_rl_lstm = nn.LSTM(rl, input=rl_input, size=spec.lstm_hidden_dim) rl_lstm.copy_to_flow_lstm(flow_rl_lstm) self.rl_lstm = flow_rl_lstm cnxin = flow.cnx("features") cnxin.add(self.feature_vector) cnxin.add(lr_input) cnxin.add(rl_input)
def __init__(self): super(Model, self).__init__() self.lstm = nn.LSTM(10, 10) self._parameters = {'lstm': self.lstm}
return arr def get_data(): for filename, sents in data.iteritems(): sent = choice(sents) mat = sent2matrix(sent) img = cv2.imread(path + filename) rs = cv2.resize(img, (100, 100)).reshape( (300, 100)).astype(theano.config.floatX) yield (mat, rs) network = nn.Container() # Encoder network.add(nn.LSTM(dict_size, 300, 300)) network.add(nn.LSTM(300, 100, 100)) network.add(nn.LSTM(100, 100, 33)) # Decoder network.add(nn.Reshape((1, 3, 11, 100))) network.add(nn.SpatialConvolution((1, 3, 11, 100), (16, 3, 5, 5))) network.add(nn.ReLU()) network.add(nn.SpatialMaxPooling((2, 2), 48)) network.add(nn.Reshape((2304, ))) network.add(nn.Linear(2304, 30000)) network.add(nn.Reshape((300, 100))) network.add(nn.MSE(), cost=True) print 'Network created' print 'Compiling function' network.make() print 'Function created'
import nn, numpy, theano, os, binascii from time import time def to_data(char): arr = numpy.zeros((1, 255), dtype=theano.config.floatX) arr[0, ord(char)] = 1 return arr network = nn.Container() network.add(nn.LSTM(255, 100, 100), enc=True) network.add(nn.LSTM(100, 255, 1), dec=True) network.add(nn.SoftMax(), dec=True) network.add(nn.ClassNLL(), cost=True) print 'Network created' print 'Compiling functions' network.make() print 'Functions created' path = '../data/' n_train = 900 for n in xrange(n_train): with open(os.path.join(path, '{}.c'.format(n)), 'r') as fi: code = '{}{}'.format(fi.read(), chr(3)) print n, code, len(code) for c1, c2 in zip(code[:-1], code[1:]): x = to_data(c1) y = to_data(c2) s = time() cost, output = network.train(x, y) print c1, c2, time() - s, cost, output.argmax(), chr(output.argmax())