def _encoder(self, in_features, encoder_n_hidden, encoder_pre_rnn_layers, encoder_post_rnn_layers, forget_gate_bias, norm, rnn_type, encoder_stack_time_factor, dropout): layers = torch.nn.ModuleDict({ "pre_rnn": rnn( rnn=rnn_type, input_size=in_features, hidden_size=encoder_n_hidden, num_layers=encoder_pre_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, dropout=dropout, ), "stack_time": StackTime(factor=encoder_stack_time_factor), "post_rnn": rnn( rnn=rnn_type, input_size=encoder_stack_time_factor * encoder_n_hidden, hidden_size=encoder_n_hidden, num_layers=encoder_post_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, norm_first_rnn=True, dropout=dropout, ), }) return layers
def _encode(self): with tf.variable_scope('rnn_1'): self.encoded_sent, _ = rnn('bi-lstm', self.embedded_inputs, self.placeholders['input_length'], hidden_size=self.hidden_size, layer_num=1, concat=True) self.encoded_sent = tf.nn.dropout( self.encoded_sent, self.placeholders['dropout_keep_prob']) self.attn_outputs, self.attn_weights = self_attention( self.encoded_sent, self.placeholders['input_length'], self.window_size) self.attn_outputs = tf.nn.dropout( self.attn_outputs, self.placeholders['dropout_keep_prob']) self.encoded_sent = tf.concat([self.encoded_sent, self.attn_outputs], -1) with tf.variable_scope('rnn_2'): self.encoded_sent, _ = rnn('bi-lstm', self.encoded_sent, self.placeholders['input_length'], hidden_size=self.hidden_size, layer_num=1, concat=True) self.encoded_sent = tf.nn.dropout( self.encoded_sent, self.placeholders['dropout_keep_prob'])
def __init__(self, in_features, encoder_n_hidden, encoder_pre_rnn_layers, encoder_post_rnn_layers, forget_gate_bias, norm, rnn_type, encoder_stack_time_factor, dropout): super().__init__() self.pre_rnn = rnn( rnn=rnn_type, input_size=in_features, hidden_size=encoder_n_hidden, num_layers=encoder_pre_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, dropout=dropout, ) self.stack_time = StackTime(factor=encoder_stack_time_factor) self.post_rnn = rnn( rnn=rnn_type, input_size=encoder_stack_time_factor * encoder_n_hidden, hidden_size=encoder_n_hidden, num_layers=encoder_post_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, norm_first_rnn=True, dropout=dropout, )
def las(config): model = dict() model = rnn.rnn(model, config, 'blstm1') model = py.py(model, config, 'py1', 'blstm1') model = rnn.rnn(model, config, 'blstm2', 'py1') model = py.py(model, config, 'py2', 'blstm2') model = rnn.rnn(model, config, 'blstm3', 'py2') model = s2s.s2s(model, config, 's2s', 'blstm3') model = fnn.fnn(model, config, 'map', 's2s') model = ce.ce(model, config, 'ce', 'map') model['loss'] = model['ce_loss'] model['step'] = tf.Variable(0, trainable=False, name='step') model['lrate'] = tf.train.exponential_decay( config.getfloat('global', 'lrate'), model['step'], config.getint('global', 'dstep'), config.getfloat('global', 'drate'), staircase=False, name='lrate') model['optim'] = getattr(tf.train, config.get('global', 'optim'))( model['lrate']).minimize(model['loss'], global_step=model['step'], name='optim') return model
def __init__(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' self.nn = rnn( log_dir='logs', checkpoint_dir='checkpoints', prediction_dir='predictions', learning_rates=[.0001, .00005, .00002], batch_sizes=[32, 64, 64], patiences=[1500, 1000, 500], beta1_decays=[.9, .9, .9], validation_batch_size=32, optimizer='rms', num_training_steps=100000, warm_start_init_step=17900, regularization_constant=0.0, keep_prob=1.0, enable_parameter_averaging=False, min_steps_to_checkpoint=2000, log_interval=20, logging_level=logging.CRITICAL, grad_clip=10, lstm_size=400, output_mixture_components=20, attention_mixture_components=10 ) self.nn.restore()
def tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell, loop_function=None, dtype=tf.float32, scope=None): """RNN sequence-to-sequence model with tied encoder and decoder parameters. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell and share parameters. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. loop_function: if not None, this function will be applied to i-th output in order to generate i+1-th input, and decoder_inputs will be ignored, except for the first element ("GO" symbol), see rnn_decoder for details. dtype: The dtype of the initial state of the rnn cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "tied_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope("combined_tied_rnn_seq2seq"): scope = scope or "tied_rnn_seq2seq" _, enc_states = rnn.rnn( cell, encoder_inputs, dtype=dtype, scope=scope) tf.get_variable_scope().reuse_variables() return rnn_decoder(decoder_inputs, enc_states[-1], cell, loop_function=loop_function, scope=scope)
def basic_rnn_seq2seq( encoder_inputs, decoder_inputs, cell, dtype=tf.float32, scope=None): """Basic RNN sequence-to-sequence model. This model first runs an RNN to encode encoder_inputs into a state vector, and then runs decoder, initialized with the last encoder state, on decoder_inputs. Encoder and decoder use the same RNN cell type, but don't share parameters. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. dtype: The dtype of the initial state of the RNN cell (default: tf.float32). scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x cell.output_size] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope(scope or "basic_rnn_seq2seq"): _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=dtype) return rnn_decoder(decoder_inputs, enc_states[-1], cell)
def __init__(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' self.nn = rnn( log_dir='logs', # checkpoint_dir='/home/martin/work/thesis/ext/handwriting-synthesis/checkpoints', checkpoint_dir='/home/martin/work/thesis/repo/results/handwriting-synthesis/1564954698_ordertests_mean', # checkpoint_dir='/home/martin/work/thesis/repo/results/handwriting-synthesis/1564411620_iam_online', prediction_dir=None, learning_rates=[.0001, .00005, .00002], batch_sizes=[32, 64, 64], patiences=[1500, 1000, 500], beta1_decays=[.9, .9, .9], validation_batch_size=32, optimizer='rms', num_training_steps=100000, warm_start_init_step=17900, regularization_constant=0.0, keep_prob=1.0, enable_parameter_averaging=False, min_steps_to_checkpoint=2000, log_interval=20, logging_level=logging.CRITICAL, grad_clip=10, lstm_size=400, output_mixture_components=20, attention_mixture_components=10 ) self.nn.restore()
def __init__(self): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' dir_path = os.path.dirname(os.path.realpath(__file__)) checkpoint_dir = os.path.join(dir_path, '..', 'checkpoints', "graves", "iam_online") self.nn = rnn( log_dir='logs', # checkpoint_dir='/home/mayr/Documents/thesis_mstumpf/results/handwriting-synthesis/iam_online', checkpoint_dir=checkpoint_dir, prediction_dir=None, learning_rates=[.0001, .00005, .00002], batch_sizes=[32, 64, 64], patiences=[1500, 1000, 500], beta1_decays=[.9, .9, .9], validation_batch_size=32, optimizer='rms', num_training_steps=100000, warm_start_init_step=17900, regularization_constant=0.0, keep_prob=1.0, enable_parameter_averaging=False, min_steps_to_checkpoint=2000, log_interval=20, logging_level=logging.CRITICAL, grad_clip=10, lstm_size=400, output_mixture_components=20, attention_mixture_components=10) self.nn.restore()
def RNN(x, weights, biases): # Prepare data shape to match `rnn` function requirements # Current data input shape: (batch_size, n_steps, n_input) # Required shape: 'n_steps' tensors list of shape (batch_size, n_input) # Permuting batch_size and n_steps x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(tensor=x, shape=[-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(value=x, num_or_size_splits=n_steps, axis=0) # Define a lstm cell with tensorflow #lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1) lstm_cell = rnn_cell.GRUCell(n_hidden) #lstm_cell = rnn_cell.LSTMCell(n_hidden,use_peepholes=True) # avoid overfitting lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.5) # 2 layers lstm lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * 2) # Get lstm cell output outputs, states = rnn.rnn(cell=lstm_cell, inputs=x, dtype=tf.float32) # Linear activation, using rnn inner loop last output return tf.matmul(outputs[-1], weights['out']) + biases['out'], outputs[-1]
def run_rnn(): rnn_obj = rnn(data_path) ## GET INPUT DATA input_data = nn_utilities_obj.prepare_digits_image_inputs() # input_data = nn_utilities_obj.load_fashion_data() ## Override the default learning rate rnn_obj.learning_rate_var = 0.0005 ## Assuming it's a SQUARE IMAGE image_height = int(np.sqrt(input_data["x_train"].shape[1])) image_width = image_height # Network Parameters num_input = image_height # MNIST data input (img shape: 28*28) timesteps = image_width # timesteps num_hidden = 128 # hidden layer num of features num_classes = 10 # MNIST total classes (0-9 digits) ## CREATE RNN MODEL optimizer, cost, accuracy, rnn_model = rnn_obj.create_model( num_input, timesteps, num_hidden, num_classes) input_data["x_train"] = np.reshape( input_data["x_train"], [input_data["x_train"].shape[0], timesteps, num_input]) input_data["x_validation"] = np.reshape( input_data["x_validation"], [input_data["x_validation"].shape[0], timesteps, num_input]) ## TRAIN THE MODEL AND TEST PREDICTION run_nn(rnn_obj, input_data, optimizer, cost, accuracy, rnn_model, "rnn/" + input_data["name"])
def dsd(config): model = dict() model = cnn.cnn(model, config, 'cnn') model = rnn.rnn(model, config, 'rnn', 'cnn') model = ctc.ctc(model, config, 'ctc', 'rnn') model = fnn.fnn(model, config, 'fnns', 'rnn') model = ctc.ctc(model, config, 'ctc', 'fnns') model = fnn.fnn(model, config, 'fnnd', 'rnn') model = ces.ces(model, config, 'ces', 'fnnd') model = dia.dia(model, config, 'dia', 'fnnd', 'rnn') model = ced.ced(model, config, 'ced', 'dia') model['loss'] = model['ctc_loss'] model['step'] = tf.Variable(0, trainable=False, name='step') model['lrate'] = tf.train.exponential_decay( config.getfloat('global', 'lrate'), model['step'], config.getint('global', 'dstep'), config.getfloat('global', 'drate'), staircase=False, name='lrate') model['optim'] = getattr(tf.train, config.get('global', 'optim'))( model['lrate']).minimize(model['loss'], global_step=model['step'], name='optim') return model
def main(): prune_occurance_lt = 15 cutoff = 100 # numpy.inf # 1000 hidden_size = 100 num_epochs = 5 print("loading data...", end="", flush=True) train, dev, test = load_data() print("done.") print("pruning words with occurances < %s..." % prune_occurance_lt, end="", flush=True) vocab, train, dev, test = prune_words( train, dev, test, prune_occurances_lt=prune_occurance_lt) vocab_size = len(vocab) train, dev, test = cutoff_data(train, dev, test, cutoff=cutoff) print("done. vocab size: %s" % len(vocab)) print("vectorizing data with cutoff: %s..." % cutoff, end="", flush=True) # train_csr, dev_csr, test_csr = make_vectorized(train, dev, test, vocab) train_is, dev_is, test_is = convert_all_data_to_vocab_indices( train, dev, test, vocab) print("done.") print("Checking data formats...") """ check_inverse_indices(train, dev, test, train_is, dev_is, test_is, vocab) X_train_csr, Y_train_csr = train_csr X_dev_csr, Y_dev_csr = dev_csr X_test_csr, Y_test_csr = test_csr """ X_train_is, Y_train_is = VG(train_is[0], vocab_size), VG(train_is[1], vocab_size) X_dev_is, Y_dev_is = VG(dev_is[0], vocab_size), VG(dev_is[1], vocab_size) X_test_is, Y_test_is = VG(test_is[0], vocab_size), VG(test_is[1], vocab_size) """ check_data_format(X_train_csr, X_train_is) check_data_format(Y_train_csr, Y_train_is) check_data_format(X_dev_csr, X_dev_is) check_data_format(Y_dev_csr, Y_dev_is) check_data_format(X_test_csr, X_test_is) check_data_format(Y_test_csr, Y_test_is) """ print("done.") # print("training csr model for %s epoch(s)..." % num_epochs) # lm_csr = rnn.rnn(len(vocab), len(vocab), hidden_size=hidden_size, seed=10) # lm_csr.train(X_train_csr, Y_train_csr, verbose=2, epochs=num_epochs) print("training vg model for %s epoch(s)..." % num_epochs) lm_vg = rnn.rnn(len(vocab), len(vocab), hidden_size=hidden_size, seed=10) lm_vg.train(X_train_is, Y_train_is, verbose=2, epochs=num_epochs) # acc_csr = test_model(lm_csr, X_test_csr, Y_test_csr) acc_vg = test_model(lm_vg, X_test_is, Y_test_is) # print("csr acc: {0:.3f}".format(acc_csr)) print("vg acc: {0:.3f}".format(acc_vg))
def getModel(params): sys.path.insert(0, global_params.py_models_path.format(params["model_name"])) if params["model_name"] == "rnn": import rnn as model return model.rnn(params) else: raise ValueError("model not found.")
def las(config): model = dict() model = rnn.rnn(model, config, 'blstm1') model = py.py(model, config, 'py1', 'blstm1') model = rnn.rnn(model, config, 'blstm2', 'py1') model = py.py(model, config, 'py2', 'blstm2') model = rnn.rnn(model, config, 'blstm3', 'py2') model = s2s.s2s(model, config, 's2s', 'blstm3') model = fnn.fnn(model, config, 'map', 's2s') model = ce.ce(model, config, 'ce', 'map') model['loss'] = model['ce_loss'] model['step'] = tf.Variable(0, trainable = False, name = 'step') model['lrate'] = tf.train.exponential_decay(config.getfloat('global', 'lrate'), model['step'], config.getint('global', 'dstep'), config.getfloat('global', 'drate'), staircase = False, name = 'lrate') model['optim'] = getattr(tf.train, config.get('global', 'optim'))(model['lrate']).minimize(model['loss'], global_step = model['step'], name = 'optim') return model
def predict(param=PARAMS, sv=SOLVE, small=False): sv['load'] = True sv['load_perfix'], sv['load_epoch'] = Slow200 sv['name'] = 'Pred' net = rnn.rnn() out = get(1, rate=0.1) train, param['eval_data'] = out['train'], out['val'] param['marks'] = param['e_marks'] = out['marks'] s = Solver(net, train, sv, **param) s.predict()
def _init_seq2seq(self, encoder_inputs, decoder_inputs, cell, feed_previous): def inference_loop_function(prev, _): prev = tf.nn.xw_plus_b(prev, self.w_softmax, self.b_softmax) return tf.to_float(tf.equal(prev, tf.reduce_max(prev, reduction_indices=[1], keep_dims=True))) loop_function = inference_loop_function if feed_previous else None with variable_scope.variable_scope('seq2seq'): _, final_enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtypes.float32) return seq2seq.rnn_decoder(decoder_inputs, final_enc_state, cell, loop_function=loop_function)
def predict(param = PARAMS, sv=SOLVE, small=False): sv['load'] = True sv['load_perfix'], sv['load_epoch'] = Slow200 sv['name'] = 'Pred' net = rnn.rnn() out = get(1, rate=0.1) train, param['eval_data'] = out['train'], out['val'] param['marks'] = param['e_marks'] = out['marks'] s = Solver(net, train, sv, **param) s.predict()
def __init__(self, checkpoint_dir='./checkpoints', bias=0.75, styles_dir='./styles', default_style=None, chars_from='áéíóúñÁÉÍÓÚÑ\t', chars_to='aeiounAEIOUN ', chars_erase='\r', verbose=False): os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' self.default_style = default_style self.style = default_style self.styles_dir = styles_dir self.bias = bias self.checkpoint_dir = checkpoint_dir self.verbose = verbose self.valid_char_set = set(drawing.alphabet) self.nn = rnn( log_dir='logs', checkpoint_dir=checkpoint_dir, prediction_dir='predictions', learning_rates=[.0001, .00005, .00002], batch_sizes=[32, 64, 64], patiences=[1500, 1000, 500], beta1_decays=[.9, .9, .9], validation_batch_size=32, optimizer='rms', num_training_steps=100000, warm_start_init_step=17900, regularization_constant=0.0, keep_prob=1.0, enable_parameter_averaging=False, min_steps_to_checkpoint=2000, log_interval=20, logging_level=logging.CRITICAL, grad_clip=10, lstm_size=400, output_mixture_components=20, attention_mixture_components=10 ) self.nn.restore() self._update_char_trans_d(chars_from=chars_from, chars_to=chars_to, chars_erase=chars_erase) return None
def __init__(self, vocab_size, n_hidden, pred_rnn_layers, forget_gate_bias, norm, rnn_type, dropout): super().__init__() self.embed = torch.nn.Embedding(vocab_size - 1, n_hidden) self.n_hidden = n_hidden self.dec_rnn = rnn( rnn=rnn_type, input_size=n_hidden, hidden_size=n_hidden, num_layers=pred_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, dropout=dropout, )
def train_nn(self, epochs=5000, batch_size=32, name="model", smoothing_factor=1): #Trains and tests a RNN. x_train = self.scaler.fit_transform(self.x_train) x_test = self.scaler.fit_transform(self.x_test) nn_prediction = rnn.rnn(x_train, x_test, self.y_train, self.y_test, self.offset, epochs, batch_size, name) self.prediction = nn_prediction nn_prediction = scipy.ndimage.gaussian_filter(nn_prediction, smoothing_factor) plt.plot(nn_prediction, label='nn output', color="green")
def ds2(config): model = dict() model = cnn.cnn(model, config, 'cnn') model = rnn.rnn(model, config, 'rnn', 'cnn') model = fnn.fnn(model, config, 'fnn', 'rnn') model = ctc.ctc(model, config, 'ctc', 'fnn') model['loss'] = model['ctc_loss'] model['step'] = tf.Variable(0, trainable = False, name = 'step') model['lrate'] = tf.train.exponential_decay(config.getfloat('global', 'lrate'), model['step'], config.getint('global', 'dstep'), config.getfloat('global', 'drate'), staircase = False, name = 'lrate') model['optim'] = getattr(tf.train, config.get('global', 'optim'))(model['lrate']).minimize(model['loss'], global_step = model['step'], name = 'optim') return model
def _predict(self, vocab_size, pred_n_hidden, pred_rnn_layers, forget_gate_bias, norm, rnn_type, dropout): layers = torch.nn.ModuleDict({ "embed": torch.nn.Embedding(vocab_size - 1, pred_n_hidden), "dec_rnn": rnn( rnn=rnn_type, input_size=pred_n_hidden, hidden_size=pred_n_hidden, num_layers=pred_rnn_layers, norm=norm, forget_gate_bias=forget_gate_bias, dropout=dropout, ), }) return layers
def train(param=PARAMS, sv=SOLVE, small=False): sv['name'] = 'TEST' input_var = raw_input('Are you testing now? ') if 'no' in input_var: sv.pop('name') else: sv['name'] += input_var net = rnn() out = get(2, rate=0.2, small=True) train, param['eval_data'] = out['train'], out['val'] param['marks'] = param['e_marks'] = out['marks'] s = Solver(net, train, sv, **param) s.train() s.predict()
def LSTM(self, name, _X, _istate): ''' shape: (batchsize, nsteps, len_vec) ''' _X = tf.transpose(_X, [1, 0, 2]) ''' shape: (nsteps, batchsize, len_vec) ''' _X = tf.reshape(_X, [self.nsteps * self.batchsize, self.len_vec]) ''' shape: n_steps * (batchsize, len_vec) ''' _X = tf.split(0, self.nsteps, _X) lstm_cell = tf.nn.rnn_cell.LSTMCell(self.len_vec, self.len_vec, state_is_tuple = False) state = _istate for step in range(self.nsteps): pred, state = rnn.rnn(lstm_cell, [_X[step]], state, dtype=tf.float32) tf.get_variable_scope().reuse_variables() if step == 0: output_state = state batch_pred_feats = pred[0][:, 0:4096] batch_pred_coords = pred[0][:, 4097:4101] return batch_pred_feats, batch_pred_coords, output_state
def __init__(self): datasize = 250 #Just make this arbitrarily large when you want to use the whole dataset print("Loading data...") if STRATIFY_DATA: self.data, self.labels = dataset.get_stratified_data(datasize, shuffle=True) else: self.data, self.labels = dataset.get_data(datasize) print("Building encoder...") self.data_encoder = OHencoder.map_to_int_ids(self.data, threshold=4) self.label_encoder = OHencoder.map_to_int_ids([self.labels]) # reserve 0th index of one_hot vector for unknown words for x in self.data_encoder: self.data_encoder[x] += 1 # split data into train and validation sets split_idx = int(len(self.data) * (1 - VAL_RATIO)) self.val_data = self.data[split_idx:] self.val_labels = self.labels[split_idx:] self.data = self.data[:split_idx] self.labels = self.labels[:split_idx] # e.g. ["Sing", "me", "a", "song"] self.data_decoder = dict([ (x[1], x[0]) for x in list(self.data_encoder.items()) ]) #Gives you word/genre from vector index # e.g. ["Rock", "Pop", "Hip Hop"] self.label_decoder = dict([(x[1], x[0]) for x in list(self.label_encoder.items())]) self.num_classes = len(self.label_encoder) #print([data_enconder[word] for word in data[-1]]) self.model = rnn( len(self.data_encoder) + 1, [128], [128], self.num_classes) self.best_acc = 0 self.criterion = nn.CrossEntropyLoss() self.optimizer = torch.optim.Adam(self.model.parameters(), lr=LEARNING_RATE)
def main(): num_features = 10 output_size = 10 hidden_size = 10 X = numpy.zeros((10, 10)) Y = numpy.zeros((10, 10)) for i in range(10): X[i][i] = 1 # if i+1 < output_size: Y[i][(i + 1) % 10] = 1 # print(X) # print(Y) num_iterations = 1000 # 400 net = rnn.rnn(num_features, output_size, hidden_size=hidden_size, seed=10) for i in range(num_iterations): net.train([X], [Y]) loss = net.loss_function([X], [Y]) print(loss) if numpy.isnan(loss): #print(net.V) #print(net.W) #print(net.U) return # net.S = numpy.zeros(net.S.shape) #print(net.W) #print() #print(net.V) #print() #print(net.U) #print() # print(net.predict(X)) #dist = net.predict_proba(X) #print(dist) #P = numpy.zeros(dist.shape) #P[range(X.shape[0]), numpy.argmax(dist, axis=1)] = 1 #print(P) Y = net.predict(X) for x, y in zip(X, Y): print("%s -> %s" % (x, y))
def train(param = PARAMS, sv=SOLVE, small=False): sv['name'] = 'TEST' input_var = raw_input('Are you testing now? ') if 'no' in input_var: sv.pop('name') else: sv['name'] += input_var net = rnn() out = get(2, rate=0.2, small=True) train, param['eval_data'] = out['train'], out['val'] param['marks'] = param['e_marks'] = out['marks'] s = Solver(net, train, sv, **param) s.train() s.predict()
def test_1D_nofilt(): ip_dim = 1 op_dim = 1 res_dim = 1 nrlayers = 1 seq_len = 10 net = rnn(nrlayers, ip_dim, res_dim, op_dim) for idx in range(len(net.weights)): net.weights[idx][net.weights[idx] > 0] = 1 net.weights[idx][net.weights[idx] < 0] = 1 net.weights[idx][net.weights[idx] == 0] = 1 net.weights[1] = np.array([[0]]) # some random input ip = np.random.rand(ip_dim, seq_len) ip += 1 states = net.rnn_forward(ip) for s in states: if np.sum((s != ip) > 0): print('Error')
def RNN(x, weights, biases, n_input): x = tf.transpose(x, [1, 0, 2]) # Reshaping to (n_steps*batch_size, n_input) x = tf.reshape(tensor=x, shape=[-1, n_input]) # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input) x = tf.split(value=x, num_or_size_splits=n_steps, axis=0) # Define a lstm cell with tensorflow #lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1) lstm_cell = rnn_cell.GRUCell(n_hidden) #lstm_cell = rnn_cell.LSTMCell(n_hidden,use_peepholes=True) # avoid overfitting lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.5) # 2 layers lstm # num_units = [256, 256] # cells = [rnn_cell.GRUCell(num_units=n) for n in num_units] # lstm_cell = rnn_cell.MultiRNNCell(cells) lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * 2) # Get lstm cell output # print(x) outputs, states = rnn.rnn(cell=lstm_cell, inputs=x, dtype=tf.float32) return tf.matmul(outputs[-1], weights) + biases, outputs
def LSTM(self, name, _X, _istate): ''' shape: (batchsize, nsteps, len_vec) ''' _X = tf.transpose(_X, [1, 0, 2]) ''' shape: (nsteps, batchsize, len_vec) ''' _X = tf.reshape(_X, [self.nsteps * self.batchsize, self.len_vec]) ''' shape: n_steps * (batchsize, len_vec) ''' _X = tf.split(0, self.nsteps, _X) lstm_cell = tf.nn.rnn_cell.LSTMCell(self.len_vec, self.len_vec, state_is_tuple=False) state = _istate for step in range(self.nsteps): pred, state = rnn.rnn(lstm_cell, [_X[step]], state, dtype=tf.float32) tf.get_variable_scope().reuse_variables() if step == 0: output_state = state batch_pred_feats = pred[0][:, 0:4096] batch_pred_coords = pred[0][:, 4097:4101] return batch_pred_feats, batch_pred_coords, output_state
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, num_heads=1, output_projection=None, feed_previous=False, dtype=tf.float32, scope=None, initial_state_attention=False): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x cell.input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. decoder_inputs: a list of 1D int32 Tensors of shape [batch_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: integer; number of symbols on the encoder side. num_decoder_symbols: integer; number of symbols on the decoder side. num_heads: number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [cell.output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". initial_state_attention: If False (default), initial attentions are zero. If True, initialize the attentions from the initial state and attention states. Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope(scope or "embedding_attention_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols) encoder_outputs, encoder_states = rnn.rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [tf.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = tf.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, feed_previous, initial_state_attention=initial_state_attention) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, states1 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, True, initial_state_attention=initial_state_attention) tf.get_variable_scope().reuse_variables() outputs2, states2 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, False, initial_state_attention=initial_state_attention) outputs = control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) states = control_flow_ops.cond(feed_previous, lambda: states1, lambda: states2) return outputs, states
def __init__(self, config, load_from_arg_dic=None): """ initialize some parameters """ if load_from_arg_dic==None: load_from_arg_dic = {} self.batch_size = batch_size = config.batch_size self.seq_len = seq_len = config.seq_len self._max_grad_norm = config.max_grad_norm size = config.hidden_size ctx = config.ctx vocab_size = config.vocab_size num_label = config.num_label self._logger = logger = Ylogger.Ylogger("test", "log/text.txt") """ build graph """ lstm_cell = [] for l in range(config.num_layers): if l == 0: p = 0 else: p = config.dropout lstm_cell.append(rnn_cell.LSTMCell(size, layeridx=l, dropout=p)) senti_cell = rnn_cell.MultiRNNCell(lstm_cell) lstm_cell_extra = [] for l in range(config.num_layers): if l == 0: p = 0 else: p = config.dropout lstm_cell_extra.append(rnn_cell.LSTMCellWithExtraInput( \ size, layeridx=l, dropout=p)) lm_cell = rnn_cell.MultiRNNCell(lstm_cell_extra) inputs = [] senti_embed_weight = mx.sym.Variable("senti_embed_weight") senti_cls_weight = mx.sym.Variable("senti_cls_weight") senti_cls_bias = mx.sym.Variable("senti_cls_bias") lm_embed_weight = mx.sym.Variable("lm_embed_weight") lm_cls_weight = mx.sym.Variable("lm_cls_weight") lm_cls_bias = mx.sym.Variable("lm_cls_bias") data = [mx.sym.Variable("t%d_data" % t) \ for t in xrange(config.seq_len)] senti = [mx.sym.Variable("t%d_senti" % t) \ for t in xrange(config.seq_len)] senti_mask = mx.sym.Variable("senti_mask") lm_mask = mx.sym.Variable("lm_mask") senti_params = [senti_embed_weight] lm_params = [lm_embed_weight] for l in xrange(config.num_layers): dic = {} dic["i2h_weight"] = mx.sym.Variable("senti_l%d_i2h_weight" % l) dic["i2h_bias"] = mx.sym.Variable("senti_l%d_i2h_bias" % l) dic["h2h_weight"] = mx.sym.Variable("senti_l%d_h2h_weight" % l) dic["h2h_bias"] = mx.sym.Variable("senti_l%d_h2h_bias" % l) senti_params.append(dic) for l in xrange(config.num_layers): dic = {} dic["i2h_weight"] = mx.sym.Variable("lm_l%d_i2h_weight" % l) dic["i2h_bias"] = mx.sym.Variable("lm_l%d_i2h_bias" % l) dic["h2h_weight"] = mx.sym.Variable("lm_l%d_h2h_weight" % l) dic["h2h_bias"] = mx.sym.Variable("lm_l%d_h2h_bias" % l) dic["s2h_weight"] = mx.sym.Variable("lm_l%d_s2h_weight" % l) dic["s2h_bias"] = mx.sym.Variable("lm_l%d_s2h_bias" % l) lm_params.append(dic) senti_cell = rnn_cell.EmbeddingWrapper(senti_cell, size) lm_cell = rnn_cell.EmbeddingWrapperWithExtraInput(lm_cell, size) initial_state = \ [(mx.sym.Variable("%d_init_c" % i), mx.sym.Variable("%d_init_h" % i)) for i in xrange(config.num_layers)] senti_outputs, senti_states = rnn.rnn(senti_cell, senti, {"cell": senti_params, "initial_state": initial_state}) lm_outputs, lm_states = rnn.rnn(lm_cell, map(list, zip(data, senti_outputs)), {"cell": lm_params, "initial_state": initial_state}) senti_ct = mx.sym.Concat(*senti_outputs, dim=0) lm_ct = mx.sym.Concat(*lm_outputs, dim=0) if config.dropout > 0.: senti_ct = mx.sym.Dropout(data=senti_ct, p=config.dropout) lm_ct = mx.sym.Dropout(data=lm_ct, p=config.dropout) senti_fc = mx.sym.FullyConnected(data=senti_ct, weight=senti_cls_weight, bias=senti_cls_bias, num_hidden=num_label) lm_fc = mx.sym.FullyConnected(data=lm_ct, weight=lm_cls_weight, bias=lm_cls_bias, num_hidden=vocab_size) senti_label = mx.sym.Variable("senti_label") senti_sm = mx.sym.SoftmaxMaskOutput(data=senti_fc, label=senti_label, mask=senti_mask, name="senti_sm") lm_label = mx.sym.Variable("lm_label") lm_sm = mx.sym.SoftmaxMaskOutput(data=lm_fc, label=lm_label, mask=lm_mask, name="lm_sm") senti_unpack_c = [] senti_unpack_h = [] for i, (c, h) in enumerate(senti_states[-1]): senti_unpack_c.append(mx.sym.BlockGrad(c, \ name="senti_l%d_last_c" % i)) senti_unpack_h.append(mx.sym.BlockGrad(h, \ name="senti_l%d_last_h" % i)) lm_unpack_c = [] lm_unpack_h = [] for i, (c, h) in enumerate(lm_states[-1]): lm_unpack_c.append(mx.sym.BlockGrad(c, \ name="lm_l%d_last_c" % i)) lm_unpack_h.append(mx.sym.BlockGrad(h, \ name="lm_l%d_last_h" % i)) rnn_sym = mx.sym.Group([senti_sm] + [lm_sm] + senti_unpack_c + senti_unpack_h + lm_unpack_c + lm_unpack_h) #dot = visualization.plot_network(rnn_sym) #dot.render("ptb.gv", view=True) """ produce interfaces for outside """ logger(rnn_sym.list_arguments()) logger(rnn_sym.list_outputs()) arg_names = rnn_sym.list_arguments() input_shapes = {} for name in arg_names: if name.endswith("init_c") or name.endswith("init_h"): input_shapes[name] = (batch_size, size) elif name.endswith("data"): input_shapes[name] = (batch_size, vocab_size) elif name.endswith("senti"): input_shapes[name] = (batch_size, 2) else: pass self._rnn_exec = rnn_exec = rnn_sym.simple_bind(ctx, "add", **input_shapes) arg_dict = dict(zip(arg_names, rnn_exec.arg_arrays)) self.arg_dict = arg_dict self._param_blocks = [] initializer = mx.initializer.Uniform(config.init_scale) for i, name in enumerate(arg_names): if is_param_name(name): #logger("init "+name) initializer(name, arg_dict[name]) self._param_blocks.append((arg_dict[name],\ rnn_exec.grad_arrays[i])) for i, name in enumerate(arg_names): if name in load_from_arg_dic: logger("load parameter" + name) load_from_arg_dic[name].copyto(arg_dict[name]) self._seq_data = [arg_dict["t%d_data" % t] for t in xrange(seq_len)] self._seq_senti = [arg_dict["t%d_senti" % t] for t in xrange(seq_len)] self._senti_mask = arg_dict["senti_mask"] self._lm_mask = arg_dict["lm_mask"] self._init_states = [(arg_dict["%d_init_c" % l], arg_dict["%d_init_h" % l]) \ for l in xrange(config.num_layers)] out_dict = dict(zip(rnn_sym.list_outputs(), rnn_exec.outputs)) self._senti_last_states = [(out_dict["senti_l%d_last_c_output" % l], out_dict["senti_l%d_last_h_output" % l]) \ for l in xrange(config.num_layers)] self._lm_last_states = [(out_dict["lm_l%d_last_c_output" % l], out_dict["lm_l%d_last_h_output" % l]) \ for l in xrange(config.num_layers)] self._senti_labels = arg_dict["senti_label"] self._lm_labels = arg_dict["lm_label"] self._senti_outputs = out_dict["senti_sm_output"] self._lm_outputs = out_dict["lm_sm_output"] self._opt = mx.optimizer.create("sgd", wd=0, momentum=0, learning_rate=config.learning_rate) self._last_loss = 1e10 self._updater = mx.optimizer.get_updater(self._opt) self._decay_when = config.decay_when self._lr_decay = config.lr_decay
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell, num_encoder_symbols, num_decoder_symbols, num_heads=1, output_projection=None, feed_previous=False, dtype=tf.float32, scope=None): """Embedding sequence-to-sequence model with attention. This model first embeds encoder_inputs by a newly created embedding (of shape [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode embedded encoder_inputs into a state vector. It keeps the outputs of this RNN at every step to use for attention later. Next, it embeds decoder_inputs by another newly created embedding (of shape [num_decoder_symbols x cell.input_size]). Then it runs attention decoder, initialized with the last encoder state, on embedded decoder_inputs and attending to encoder outputs. Args: encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size]. cell: rnn_cell.RNNCell defining the cell function and size. num_encoder_symbols: integer; number of symbols on the encoder side. num_decoder_symbols: integer; number of symbols on the decoder side. num_heads: number of attention heads that read from attention_states. output_projection: None or a pair (W, B) of output projection weights and biases; W has shape [cell.output_size x num_decoder_symbols] and B has shape [num_decoder_symbols]; if provided and feed_previous=True, each fed previous output will first be multiplied by W and added B. feed_previous: Boolean or scalar Boolean Tensor; if True, only the first of decoder_inputs will be used (the "GO" symbol), and all other decoder inputs will be taken from previous outputs (as in embedding_rnn_decoder). If False, decoder_inputs are used as given (the standard decoder case). dtype: The dtype of the initial RNN state (default: tf.float32). scope: VariableScope for the created subgraph; defaults to "embedding_attention_seq2seq". Returns: outputs: A list of the same length as decoder_inputs of 2D Tensors with shape [batch_size x num_decoder_symbols] containing the generated outputs. states: The state of each decoder cell in each time-step. This is a list with length len(decoder_inputs) -- one item for each time-step. Each item is a 2D Tensor of shape [batch_size x cell.state_size]. """ with tf.variable_scope(scope or "embedding_attention_seq2seq"): # Encoder. encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols) encoder_outputs, encoder_states = rnn.rnn( encoder_cell, encoder_inputs, dtype=dtype) # First calculate a concatenation of encoder outputs to put attention on. top_states = [tf.reshape(e, [-1, 1, cell.output_size]) for e in encoder_outputs] attention_states = tf.concat(1, top_states) # Decoder. output_size = None if output_projection is None: cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols) output_size = num_decoder_symbols if isinstance(feed_previous, bool): return embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, feed_previous) else: # If feed_previous is a Tensor, we construct 2 graphs and use cond. outputs1, states1 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, True) tf.get_variable_scope().reuse_variables() outputs2, states2 = embedding_attention_decoder( decoder_inputs, encoder_states[-1], attention_states, cell, num_decoder_symbols, num_heads, output_size, output_projection, False) outputs = tf.control_flow_ops.cond(feed_previous, lambda: outputs1, lambda: outputs2) states = tf.control_flow_ops.cond(feed_previous, lambda: states1, lambda: states2) return outputs, states
X, Y = pickle.load(fp) for i in range(nb_unknown_words): idx2word[vocab_size - 1 - i] = '<%d>' % i oov0 = vocab_size - nb_unknown_words X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=nb_val_samples, random_state=seed) idx2word[empty] = '_' idx2word[eos] = '~' model = rnn(vocab_size, embedding_size, maxlen, embedding, maxlend, maxlenh) model.add( TimeDistributed( Dense(vocab_size, kernel_regularizer=regularizer, bias_regularizer=regularizer, name='timedistributed_1'))) model.add(Activation('softmax', name='activation_1')) model.compile(loss='categorical_crossentropy', optimizer=optimizer) K.set_value(model.optimizer.lr, np.float32(LR)) model.summary() if False: model.load_weights('model.hdf5')
def init_rnn(self,L1,L2): self.rnn_model = rnn(70,self.length,4,L1,L2)
def main(): myrnn = rnn.rnn() myrnn.request()
########## Normalize time-series values ############# mean = np.mean(time_series_data) stdev = np.sqrt(np.var(time_series_data)) time_series_data_normalized = ((time_series_data-mean)/stdev)[:1000] ################## Data preparation ############################ X_column_list = [0] y_column_list = [0] number_of_delays = 8 test_fraction = 0.5 X_train,y_train,X_test,y_test = data_prep.prepare(time_series_data_normalized,X_column_list,y_column_list,number_of_delays,test_fraction) ########### Model RNN using rnn class ############### rnn1 = rnn.rnn() rnn1.initialize(X_train, y_train,hidden_units_1=3,hidden_units_2=3,activation_1='tanh',activation_2='tanh') rnn1.get_parameters() initial_theta=np.asarray(list(rnn1.parameters.values())) (loss_vector,loss_val_vector)=rnn1.train(rnn1.X,rnn1.y,initial_theta,train_validation_split=0.7,epochs=10000,restore_best_theta=True) y_pred=rnn1.predict(X_test) rnn1.print_dashed_line() print("Test error:") print(metrics.mean_squared_error(y_test[:,0],y_pred[:,0])) rnn1.print_dashed_line() ############ Plotting loss function, attractor and time-series predictions ############### print("Plotting training and validation losses...") plt.plot(loss_vector,label="Training")
def init_rnn_model(self,num_hidden,L1,L2): self.rnn_model = rnn(num_hidden,len(self.dic),len(self.dic),L1,L2)