예제 #1
0
 def _encoder(self, in_features, encoder_n_hidden, encoder_pre_rnn_layers,
              encoder_post_rnn_layers, forget_gate_bias, norm, rnn_type,
              encoder_stack_time_factor, dropout):
     layers = torch.nn.ModuleDict({
         "pre_rnn":
         rnn(
             rnn=rnn_type,
             input_size=in_features,
             hidden_size=encoder_n_hidden,
             num_layers=encoder_pre_rnn_layers,
             norm=norm,
             forget_gate_bias=forget_gate_bias,
             dropout=dropout,
         ),
         "stack_time":
         StackTime(factor=encoder_stack_time_factor),
         "post_rnn":
         rnn(
             rnn=rnn_type,
             input_size=encoder_stack_time_factor * encoder_n_hidden,
             hidden_size=encoder_n_hidden,
             num_layers=encoder_post_rnn_layers,
             norm=norm,
             forget_gate_bias=forget_gate_bias,
             norm_first_rnn=True,
             dropout=dropout,
         ),
     })
     return layers
예제 #2
0
 def _encode(self):
     with tf.variable_scope('rnn_1'):
         self.encoded_sent, _ = rnn('bi-lstm',
                                    self.embedded_inputs,
                                    self.placeholders['input_length'],
                                    hidden_size=self.hidden_size,
                                    layer_num=1,
                                    concat=True)
         self.encoded_sent = tf.nn.dropout(
             self.encoded_sent, self.placeholders['dropout_keep_prob'])
     self.attn_outputs, self.attn_weights = self_attention(
         self.encoded_sent, self.placeholders['input_length'],
         self.window_size)
     self.attn_outputs = tf.nn.dropout(
         self.attn_outputs, self.placeholders['dropout_keep_prob'])
     self.encoded_sent = tf.concat([self.encoded_sent, self.attn_outputs],
                                   -1)
     with tf.variable_scope('rnn_2'):
         self.encoded_sent, _ = rnn('bi-lstm',
                                    self.encoded_sent,
                                    self.placeholders['input_length'],
                                    hidden_size=self.hidden_size,
                                    layer_num=1,
                                    concat=True)
         self.encoded_sent = tf.nn.dropout(
             self.encoded_sent, self.placeholders['dropout_keep_prob'])
예제 #3
0
 def __init__(self, in_features, encoder_n_hidden,
              encoder_pre_rnn_layers, encoder_post_rnn_layers,
              forget_gate_bias, norm, rnn_type, encoder_stack_time_factor,
              dropout):
     super().__init__()
     self.pre_rnn = rnn(
         rnn=rnn_type,
         input_size=in_features,
         hidden_size=encoder_n_hidden,
         num_layers=encoder_pre_rnn_layers,
         norm=norm,
         forget_gate_bias=forget_gate_bias,
         dropout=dropout,
     )
     self.stack_time = StackTime(factor=encoder_stack_time_factor)
     self.post_rnn = rnn(
         rnn=rnn_type,
         input_size=encoder_stack_time_factor * encoder_n_hidden,
         hidden_size=encoder_n_hidden,
         num_layers=encoder_post_rnn_layers,
         norm=norm,
         forget_gate_bias=forget_gate_bias,
         norm_first_rnn=True,
         dropout=dropout,
     )
예제 #4
0
def las(config):
    model = dict()

    model = rnn.rnn(model, config, 'blstm1')
    model = py.py(model, config, 'py1', 'blstm1')
    model = rnn.rnn(model, config, 'blstm2', 'py1')
    model = py.py(model, config, 'py2', 'blstm2')
    model = rnn.rnn(model, config, 'blstm3', 'py2')
    model = s2s.s2s(model, config, 's2s', 'blstm3')
    model = fnn.fnn(model, config, 'map', 's2s')
    model = ce.ce(model, config, 'ce', 'map')

    model['loss'] = model['ce_loss']
    model['step'] = tf.Variable(0, trainable=False, name='step')
    model['lrate'] = tf.train.exponential_decay(
        config.getfloat('global', 'lrate'),
        model['step'],
        config.getint('global', 'dstep'),
        config.getfloat('global', 'drate'),
        staircase=False,
        name='lrate')
    model['optim'] = getattr(tf.train, config.get('global', 'optim'))(
        model['lrate']).minimize(model['loss'],
                                 global_step=model['step'],
                                 name='optim')

    return model
예제 #5
0
 def __init__(self):
     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
     self.nn = rnn(
         log_dir='logs',
         checkpoint_dir='checkpoints',
         prediction_dir='predictions',
         learning_rates=[.0001, .00005, .00002],
         batch_sizes=[32, 64, 64],
         patiences=[1500, 1000, 500],
         beta1_decays=[.9, .9, .9],
         validation_batch_size=32,
         optimizer='rms',
         num_training_steps=100000,
         warm_start_init_step=17900,
         regularization_constant=0.0,
         keep_prob=1.0,
         enable_parameter_averaging=False,
         min_steps_to_checkpoint=2000,
         log_interval=20,
         logging_level=logging.CRITICAL,
         grad_clip=10,
         lstm_size=400,
         output_mixture_components=20,
         attention_mixture_components=10
     )
     self.nn.restore()
예제 #6
0
def tied_rnn_seq2seq(encoder_inputs, decoder_inputs, cell,
                     loop_function=None, dtype=tf.float32, scope=None):
  """RNN sequence-to-sequence model with tied encoder and decoder parameters.

  This model first runs an RNN to encode encoder_inputs into a state vector, and
  then runs decoder, initialized with the last encoder state, on decoder_inputs.
  Encoder and decoder use the same RNN cell and share parameters.

  Args:
    encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    loop_function: if not None, this function will be applied to i-th output
      in order to generate i+1-th input, and decoder_inputs will be ignored,
      except for the first element ("GO" symbol), see rnn_decoder for details.
    dtype: The dtype of the initial state of the rnn cell (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "tied_rnn_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x cell.output_size] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with tf.variable_scope("combined_tied_rnn_seq2seq"):
    scope = scope or "tied_rnn_seq2seq"
    _, enc_states = rnn.rnn(
        cell, encoder_inputs, dtype=dtype, scope=scope)
    tf.get_variable_scope().reuse_variables()
    return rnn_decoder(decoder_inputs, enc_states[-1], cell,
                       loop_function=loop_function, scope=scope)
예제 #7
0
def basic_rnn_seq2seq(
    encoder_inputs, decoder_inputs, cell, dtype=tf.float32, scope=None):
  """Basic RNN sequence-to-sequence model.

  This model first runs an RNN to encode encoder_inputs into a state vector, and
  then runs decoder, initialized with the last encoder state, on decoder_inputs.
  Encoder and decoder use the same RNN cell type, but don't share parameters.

  Args:
    encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    dtype: The dtype of the initial state of the RNN cell (default: tf.float32).
    scope: VariableScope for the created subgraph; default: "basic_rnn_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x cell.output_size] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with tf.variable_scope(scope or "basic_rnn_seq2seq"):
    _, enc_states = rnn.rnn(cell, encoder_inputs, dtype=dtype)
    return rnn_decoder(decoder_inputs, enc_states[-1], cell)
 def __init__(self):
     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
     self.nn = rnn(
         log_dir='logs',
         # checkpoint_dir='/home/martin/work/thesis/ext/handwriting-synthesis/checkpoints',
         checkpoint_dir='/home/martin/work/thesis/repo/results/handwriting-synthesis/1564954698_ordertests_mean',
         # checkpoint_dir='/home/martin/work/thesis/repo/results/handwriting-synthesis/1564411620_iam_online',
         prediction_dir=None,
         learning_rates=[.0001, .00005, .00002],
         batch_sizes=[32, 64, 64],
         patiences=[1500, 1000, 500],
         beta1_decays=[.9, .9, .9],
         validation_batch_size=32,
         optimizer='rms',
         num_training_steps=100000,
         warm_start_init_step=17900,
         regularization_constant=0.0,
         keep_prob=1.0,
         enable_parameter_averaging=False,
         min_steps_to_checkpoint=2000,
         log_interval=20,
         logging_level=logging.CRITICAL,
         grad_clip=10,
         lstm_size=400,
         output_mixture_components=20,
         attention_mixture_components=10
     )
     self.nn.restore()
 def __init__(self):
     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
     dir_path = os.path.dirname(os.path.realpath(__file__))
     checkpoint_dir = os.path.join(dir_path, '..', 'checkpoints', "graves",
                                   "iam_online")
     self.nn = rnn(
         log_dir='logs',
         # checkpoint_dir='/home/mayr/Documents/thesis_mstumpf/results/handwriting-synthesis/iam_online',
         checkpoint_dir=checkpoint_dir,
         prediction_dir=None,
         learning_rates=[.0001, .00005, .00002],
         batch_sizes=[32, 64, 64],
         patiences=[1500, 1000, 500],
         beta1_decays=[.9, .9, .9],
         validation_batch_size=32,
         optimizer='rms',
         num_training_steps=100000,
         warm_start_init_step=17900,
         regularization_constant=0.0,
         keep_prob=1.0,
         enable_parameter_averaging=False,
         min_steps_to_checkpoint=2000,
         log_interval=20,
         logging_level=logging.CRITICAL,
         grad_clip=10,
         lstm_size=400,
         output_mixture_components=20,
         attention_mixture_components=10)
     self.nn.restore()
예제 #10
0
파일: lstm.py 프로젝트: zhaoaite/ASTCapsNet
def RNN(x, weights, biases):

    # Prepare data shape to match `rnn` function requirements
    # Current data input shape: (batch_size, n_steps, n_input)
    # Required shape: 'n_steps' tensors list of shape (batch_size, n_input)

    # Permuting batch_size and n_steps
    x = tf.transpose(x, [1, 0, 2])

    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(tensor=x, shape=[-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(value=x, num_or_size_splits=n_steps, axis=0)
    # Define a lstm cell with tensorflow
    #lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1)
    lstm_cell = rnn_cell.GRUCell(n_hidden)
    #lstm_cell = rnn_cell.LSTMCell(n_hidden,use_peepholes=True)
    # avoid overfitting
    lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.5)
    # 2 layers lstm
    lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * 2)
    # Get lstm cell output
    outputs, states = rnn.rnn(cell=lstm_cell, inputs=x, dtype=tf.float32)
    # Linear activation, using rnn inner loop last output
    return tf.matmul(outputs[-1], weights['out']) + biases['out'], outputs[-1]
예제 #11
0
def run_rnn():
    rnn_obj = rnn(data_path)

    ## GET INPUT  DATA
    input_data = nn_utilities_obj.prepare_digits_image_inputs()
    #    input_data = nn_utilities_obj.load_fashion_data()

    ## Override the default learning rate
    rnn_obj.learning_rate_var = 0.0005

    ## Assuming it's a SQUARE IMAGE
    image_height = int(np.sqrt(input_data["x_train"].shape[1]))
    image_width = image_height

    # Network Parameters
    num_input = image_height  # MNIST data input (img shape: 28*28)
    timesteps = image_width  # timesteps
    num_hidden = 128  # hidden layer num of features
    num_classes = 10  # MNIST total classes (0-9 digits)

    ## CREATE RNN MODEL
    optimizer, cost, accuracy, rnn_model = rnn_obj.create_model(
        num_input, timesteps, num_hidden, num_classes)

    input_data["x_train"] = np.reshape(
        input_data["x_train"],
        [input_data["x_train"].shape[0], timesteps, num_input])
    input_data["x_validation"] = np.reshape(
        input_data["x_validation"],
        [input_data["x_validation"].shape[0], timesteps, num_input])

    ## TRAIN THE MODEL AND TEST PREDICTION
    run_nn(rnn_obj, input_data, optimizer, cost, accuracy, rnn_model,
           "rnn/" + input_data["name"])
예제 #12
0
 def __init__(self):
     os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'
     self.nn = rnn(
         log_dir='logs',
         checkpoint_dir='checkpoints',
         prediction_dir='predictions',
         learning_rates=[.0001, .00005, .00002],
         batch_sizes=[32, 64, 64],
         patiences=[1500, 1000, 500],
         beta1_decays=[.9, .9, .9],
         validation_batch_size=32,
         optimizer='rms',
         num_training_steps=100000,
         warm_start_init_step=17900,
         regularization_constant=0.0,
         keep_prob=1.0,
         enable_parameter_averaging=False,
         min_steps_to_checkpoint=2000,
         log_interval=20,
         logging_level=logging.CRITICAL,
         grad_clip=10,
         lstm_size=400,
         output_mixture_components=20,
         attention_mixture_components=10
     )
     self.nn.restore()
예제 #13
0
def dsd(config):
    model = dict()

    model = cnn.cnn(model, config, 'cnn')
    model = rnn.rnn(model, config, 'rnn', 'cnn')
    model = ctc.ctc(model, config, 'ctc', 'rnn')
    model = fnn.fnn(model, config, 'fnns', 'rnn')
    model = ctc.ctc(model, config, 'ctc', 'fnns')
    model = fnn.fnn(model, config, 'fnnd', 'rnn')
    model = ces.ces(model, config, 'ces', 'fnnd')
    model = dia.dia(model, config, 'dia', 'fnnd', 'rnn')
    model = ced.ced(model, config, 'ced', 'dia')

    model['loss'] = model['ctc_loss']
    model['step'] = tf.Variable(0, trainable=False, name='step')
    model['lrate'] = tf.train.exponential_decay(
        config.getfloat('global', 'lrate'),
        model['step'],
        config.getint('global', 'dstep'),
        config.getfloat('global', 'drate'),
        staircase=False,
        name='lrate')
    model['optim'] = getattr(tf.train, config.get('global', 'optim'))(
        model['lrate']).minimize(model['loss'],
                                 global_step=model['step'],
                                 name='optim')

    return model
예제 #14
0
def main():
    prune_occurance_lt = 15
    cutoff = 100  # numpy.inf # 1000
    hidden_size = 100
    num_epochs = 5

    print("loading data...", end="", flush=True)
    train, dev, test = load_data()
    print("done.")
    print("pruning words with occurances < %s..." % prune_occurance_lt,
          end="",
          flush=True)
    vocab, train, dev, test = prune_words(
        train, dev, test, prune_occurances_lt=prune_occurance_lt)
    vocab_size = len(vocab)

    train, dev, test = cutoff_data(train, dev, test, cutoff=cutoff)

    print("done. vocab size: %s" % len(vocab))
    print("vectorizing data with cutoff: %s..." % cutoff, end="", flush=True)
    # train_csr, dev_csr, test_csr = make_vectorized(train, dev, test, vocab)
    train_is, dev_is, test_is = convert_all_data_to_vocab_indices(
        train, dev, test, vocab)
    print("done.")
    print("Checking data formats...")
    """
    check_inverse_indices(train, dev, test, train_is, dev_is, test_is, vocab)

    X_train_csr, Y_train_csr = train_csr
    X_dev_csr, Y_dev_csr = dev_csr
    X_test_csr, Y_test_csr = test_csr
    """

    X_train_is, Y_train_is = VG(train_is[0],
                                vocab_size), VG(train_is[1], vocab_size)
    X_dev_is, Y_dev_is = VG(dev_is[0], vocab_size), VG(dev_is[1], vocab_size)
    X_test_is, Y_test_is = VG(test_is[0],
                              vocab_size), VG(test_is[1], vocab_size)
    """
    check_data_format(X_train_csr, X_train_is)
    check_data_format(Y_train_csr, Y_train_is)
    check_data_format(X_dev_csr, X_dev_is)
    check_data_format(Y_dev_csr, Y_dev_is)
    check_data_format(X_test_csr, X_test_is)
    check_data_format(Y_test_csr, Y_test_is)
    """
    print("done.")

    # print("training csr model for %s epoch(s)..." % num_epochs)
    # lm_csr = rnn.rnn(len(vocab), len(vocab), hidden_size=hidden_size, seed=10)
    # lm_csr.train(X_train_csr, Y_train_csr, verbose=2, epochs=num_epochs)

    print("training vg model for %s epoch(s)..." % num_epochs)
    lm_vg = rnn.rnn(len(vocab), len(vocab), hidden_size=hidden_size, seed=10)
    lm_vg.train(X_train_is, Y_train_is, verbose=2, epochs=num_epochs)

    # acc_csr = test_model(lm_csr, X_test_csr, Y_test_csr)
    acc_vg = test_model(lm_vg, X_test_is, Y_test_is)
    # print("csr acc: {0:.3f}".format(acc_csr))
    print("vg acc: {0:.3f}".format(acc_vg))
예제 #15
0
def getModel(params):
    sys.path.insert(0,
                    global_params.py_models_path.format(params["model_name"]))
    if params["model_name"] == "rnn":
        import rnn as model
        return model.rnn(params)
    else:
        raise ValueError("model not found.")
예제 #16
0
파일: las.py 프로젝트: aaiijmrtt/DEEPSPEECH
def las(config):
	model = dict()

	model = rnn.rnn(model, config, 'blstm1')
	model = py.py(model, config, 'py1', 'blstm1')
	model = rnn.rnn(model, config, 'blstm2', 'py1')
	model = py.py(model, config, 'py2', 'blstm2')
	model = rnn.rnn(model, config, 'blstm3', 'py2')
	model = s2s.s2s(model, config, 's2s', 'blstm3')
	model = fnn.fnn(model, config, 'map', 's2s')
	model = ce.ce(model, config, 'ce', 'map')

	model['loss'] = model['ce_loss']
	model['step'] = tf.Variable(0, trainable = False, name = 'step')
	model['lrate'] = tf.train.exponential_decay(config.getfloat('global', 'lrate'), model['step'], config.getint('global', 'dstep'), config.getfloat('global', 'drate'), staircase = False, name = 'lrate')
	model['optim'] = getattr(tf.train, config.get('global', 'optim'))(model['lrate']).minimize(model['loss'], global_step = model['step'], name = 'optim')

	return model
def predict(param=PARAMS, sv=SOLVE, small=False):
    sv['load'] = True
    sv['load_perfix'], sv['load_epoch'] = Slow200

    sv['name'] = 'Pred'
    net = rnn.rnn()
    out = get(1, rate=0.1)
    train, param['eval_data'] = out['train'], out['val']
    param['marks'] = param['e_marks'] = out['marks']
    s = Solver(net, train, sv, **param)
    s.predict()
예제 #18
0
    def _init_seq2seq(self, encoder_inputs, decoder_inputs, cell, feed_previous):

        def inference_loop_function(prev, _):
            prev = tf.nn.xw_plus_b(prev, self.w_softmax, self.b_softmax)
            return tf.to_float(tf.equal(prev, tf.reduce_max(prev, reduction_indices=[1], keep_dims=True)))

        loop_function = inference_loop_function if feed_previous else None

        with variable_scope.variable_scope('seq2seq'):
            _, final_enc_state = rnn.rnn(cell, encoder_inputs, dtype=dtypes.float32)
            return seq2seq.rnn_decoder(decoder_inputs, final_enc_state, cell, loop_function=loop_function)
def predict(param = PARAMS, sv=SOLVE, small=False):
    sv['load'] = True
    sv['load_perfix'], sv['load_epoch'] = Slow200

    sv['name'] = 'Pred'
    net = rnn.rnn()
    out = get(1, rate=0.1)
    train, param['eval_data'] = out['train'], out['val']  
    param['marks'] = param['e_marks'] = out['marks'] 
    s = Solver(net, train, sv, **param)
    s.predict()
예제 #20
0
    def __init__(self,
                 checkpoint_dir='./checkpoints',
                 bias=0.75, styles_dir='./styles',
                 default_style=None,

                 chars_from='áéíóúñÁÉÍÓÚÑ\t',
                 chars_to='aeiounAEIOUN ',
                 chars_erase='\r',
                 
                 verbose=False):
        
        os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'

        self.default_style  = default_style
        
        self.style          = default_style
        self.styles_dir     = styles_dir
        self.bias           = bias
        self.checkpoint_dir = checkpoint_dir
        self.verbose        = verbose
        

        self.valid_char_set = set(drawing.alphabet)
        
        self.nn = rnn(
            log_dir='logs',
            checkpoint_dir=checkpoint_dir,
            prediction_dir='predictions',
            learning_rates=[.0001, .00005, .00002],
            batch_sizes=[32, 64, 64],
            patiences=[1500, 1000, 500],
            beta1_decays=[.9, .9, .9],
            validation_batch_size=32,
            optimizer='rms',
            num_training_steps=100000,
            warm_start_init_step=17900,
            regularization_constant=0.0,
            keep_prob=1.0,
            enable_parameter_averaging=False,
            min_steps_to_checkpoint=2000,
            log_interval=20,
            logging_level=logging.CRITICAL,
            grad_clip=10,
            lstm_size=400,
            output_mixture_components=20,
            attention_mixture_components=10
        )
        self.nn.restore()

        self._update_char_trans_d(chars_from=chars_from, chars_to=chars_to, chars_erase=chars_erase)
        
        return None
예제 #21
0
 def __init__(self, vocab_size, n_hidden, pred_rnn_layers,
              forget_gate_bias, norm, rnn_type, dropout):
     super().__init__()
     self.embed = torch.nn.Embedding(vocab_size - 1, n_hidden)
     self.n_hidden = n_hidden
     self.dec_rnn = rnn(
         rnn=rnn_type,
         input_size=n_hidden,
         hidden_size=n_hidden,
         num_layers=pred_rnn_layers,
         norm=norm,
         forget_gate_bias=forget_gate_bias,
         dropout=dropout,
     )
예제 #22
0
 def train_nn(self,
              epochs=5000,
              batch_size=32,
              name="model",
              smoothing_factor=1):
     #Trains and tests a RNN.
     x_train = self.scaler.fit_transform(self.x_train)
     x_test = self.scaler.fit_transform(self.x_test)
     nn_prediction = rnn.rnn(x_train, x_test, self.y_train, self.y_test,
                             self.offset, epochs, batch_size, name)
     self.prediction = nn_prediction
     nn_prediction = scipy.ndimage.gaussian_filter(nn_prediction,
                                                   smoothing_factor)
     plt.plot(nn_prediction, label='nn output', color="green")
예제 #23
0
파일: ds2.py 프로젝트: aaiijmrtt/DEEPSPEECH
def ds2(config):
	model = dict()

	model = cnn.cnn(model, config, 'cnn')
	model = rnn.rnn(model, config, 'rnn', 'cnn')
	model = fnn.fnn(model, config, 'fnn', 'rnn')
	model = ctc.ctc(model, config, 'ctc', 'fnn')

	model['loss'] = model['ctc_loss']
	model['step'] = tf.Variable(0, trainable = False, name = 'step')
	model['lrate'] = tf.train.exponential_decay(config.getfloat('global', 'lrate'), model['step'], config.getint('global', 'dstep'), config.getfloat('global', 'drate'), staircase = False, name = 'lrate')
	model['optim'] = getattr(tf.train, config.get('global', 'optim'))(model['lrate']).minimize(model['loss'], global_step = model['step'], name = 'optim')

	return model
예제 #24
0
 def _predict(self, vocab_size, pred_n_hidden, pred_rnn_layers,
              forget_gate_bias, norm, rnn_type, dropout):
     layers = torch.nn.ModuleDict({
         "embed": torch.nn.Embedding(vocab_size - 1, pred_n_hidden),
         "dec_rnn": rnn(
             rnn=rnn_type,
             input_size=pred_n_hidden,
             hidden_size=pred_n_hidden,
             num_layers=pred_rnn_layers,
             norm=norm,
             forget_gate_bias=forget_gate_bias,
             dropout=dropout,
         ),
     })
     return layers
def train(param=PARAMS, sv=SOLVE, small=False):

    sv['name'] = 'TEST'
    input_var = raw_input('Are you testing now? ')

    if 'no' in input_var:
        sv.pop('name')
    else:
        sv['name'] += input_var

    net = rnn()
    out = get(2, rate=0.2, small=True)
    train, param['eval_data'] = out['train'], out['val']
    param['marks'] = param['e_marks'] = out['marks']

    s = Solver(net, train, sv, **param)
    s.train()
    s.predict()
예제 #26
0
파일: training.py 프로젝트: Guanghan/ROLO
    def LSTM(self, name,  _X, _istate):
        ''' shape: (batchsize, nsteps, len_vec) '''
        _X = tf.transpose(_X, [1, 0, 2])
        ''' shape: (nsteps, batchsize, len_vec) '''
        _X = tf.reshape(_X, [self.nsteps * self.batchsize, self.len_vec])
        ''' shape: n_steps * (batchsize, len_vec) '''
        _X = tf.split(0, self.nsteps, _X)

        lstm_cell = tf.nn.rnn_cell.LSTMCell(self.len_vec, self.len_vec, state_is_tuple = False)
        state = _istate
        for step in range(self.nsteps):
            pred, state = rnn.rnn(lstm_cell, [_X[step]], state, dtype=tf.float32)
            tf.get_variable_scope().reuse_variables()
            if step == 0:   output_state = state

        batch_pred_feats = pred[0][:, 0:4096]
        batch_pred_coords = pred[0][:, 4097:4101]
        return batch_pred_feats, batch_pred_coords, output_state
예제 #27
0
    def __init__(self):
        datasize = 250  #Just make this arbitrarily large when you want to use the whole dataset
        print("Loading data...")
        if STRATIFY_DATA:
            self.data, self.labels = dataset.get_stratified_data(datasize,
                                                                 shuffle=True)
        else:
            self.data, self.labels = dataset.get_data(datasize)

        print("Building encoder...")

        self.data_encoder = OHencoder.map_to_int_ids(self.data, threshold=4)
        self.label_encoder = OHencoder.map_to_int_ids([self.labels])

        # reserve 0th index of one_hot vector for unknown words
        for x in self.data_encoder:
            self.data_encoder[x] += 1

        # split data into train and validation sets
        split_idx = int(len(self.data) * (1 - VAL_RATIO))
        self.val_data = self.data[split_idx:]
        self.val_labels = self.labels[split_idx:]
        self.data = self.data[:split_idx]
        self.labels = self.labels[:split_idx]

        # e.g. ["Sing", "me", "a", "song"]
        self.data_decoder = dict([
            (x[1], x[0]) for x in list(self.data_encoder.items())
        ])  #Gives you word/genre from vector index
        # e.g. ["Rock", "Pop", "Hip Hop"]
        self.label_decoder = dict([(x[1], x[0])
                                   for x in list(self.label_encoder.items())])

        self.num_classes = len(self.label_encoder)

        #print([data_enconder[word] for word in data[-1]])
        self.model = rnn(
            len(self.data_encoder) + 1, [128], [128], self.num_classes)

        self.best_acc = 0

        self.criterion = nn.CrossEntropyLoss()
        self.optimizer = torch.optim.Adam(self.model.parameters(),
                                          lr=LEARNING_RATE)
예제 #28
0
def main():
    num_features = 10
    output_size = 10
    hidden_size = 10

    X = numpy.zeros((10, 10))
    Y = numpy.zeros((10, 10))
    for i in range(10):
        X[i][i] = 1
        # if i+1 < output_size:
        Y[i][(i + 1) % 10] = 1

    # print(X)
    # print(Y)

    num_iterations = 1000  # 400

    net = rnn.rnn(num_features, output_size, hidden_size=hidden_size, seed=10)
    for i in range(num_iterations):
        net.train([X], [Y])
        loss = net.loss_function([X], [Y])
        print(loss)
        if numpy.isnan(loss):
            #print(net.V)
            #print(net.W)
            #print(net.U)
            return
    # net.S = numpy.zeros(net.S.shape)
    #print(net.W)
    #print()
    #print(net.V)
    #print()
    #print(net.U)
    #print()

    # print(net.predict(X))
    #dist = net.predict_proba(X)
    #print(dist)
    #P = numpy.zeros(dist.shape)
    #P[range(X.shape[0]), numpy.argmax(dist, axis=1)] = 1
    #print(P)
    Y = net.predict(X)
    for x, y in zip(X, Y):
        print("%s -> %s" % (x, y))
def train(param = PARAMS, sv=SOLVE, small=False):

    sv['name'] = 'TEST'
    input_var = raw_input('Are you testing now? ')
    
    if 'no' in input_var:
        sv.pop('name')
    else:
        sv['name'] += input_var


    net = rnn()
    out = get(2, rate=0.2, small=True) 
    train, param['eval_data'] = out['train'], out['val']  
    param['marks'] = param['e_marks'] = out['marks'] 

    s = Solver(net, train, sv, **param)
    s.train()
    s.predict()
예제 #30
0
def test_1D_nofilt():
    ip_dim = 1
    op_dim = 1
    res_dim = 1
    nrlayers = 1
    seq_len = 10
    net = rnn(nrlayers, ip_dim, res_dim, op_dim)
    for idx in range(len(net.weights)):
        net.weights[idx][net.weights[idx] > 0] = 1
        net.weights[idx][net.weights[idx] < 0] = 1
        net.weights[idx][net.weights[idx] == 0] = 1
    net.weights[1] = np.array([[0]])

    # some random input
    ip = np.random.rand(ip_dim, seq_len)
    ip += 1
    states = net.rnn_forward(ip)
    for s in states:
        if np.sum((s != ip) > 0):
            print('Error')
예제 #31
0
def RNN(x, weights, biases, n_input):
    x = tf.transpose(x, [1, 0, 2])
    # Reshaping to (n_steps*batch_size, n_input)
    x = tf.reshape(tensor=x, shape=[-1, n_input])
    # Split to get a list of 'n_steps' tensors of shape (batch_size, n_input)
    x = tf.split(value=x, num_or_size_splits=n_steps, axis=0)
    # Define a lstm cell with tensorflow
    #lstm_cell = rnn_cell.BasicLSTMCell(n_hidden, forget_bias=1)
    lstm_cell = rnn_cell.GRUCell(n_hidden)
    #lstm_cell = rnn_cell.LSTMCell(n_hidden,use_peepholes=True)
    # avoid overfitting
    lstm_cell = rnn_cell.DropoutWrapper(lstm_cell, output_keep_prob=0.5)
    # 2 layers lstm
#    num_units = [256, 256]
#    cells = [rnn_cell.GRUCell(num_units=n) for n in num_units]
#    lstm_cell = rnn_cell.MultiRNNCell(cells)
    lstm_cell = rnn_cell.MultiRNNCell([lstm_cell] * 2)   
    # Get lstm cell output
#    print(x)
    outputs, states = rnn.rnn(cell=lstm_cell, inputs=x, dtype=tf.float32)
    return tf.matmul(outputs[-1], weights) + biases, outputs
예제 #32
0
파일: training.py 프로젝트: zqcr/ROLO
    def LSTM(self, name, _X, _istate):
        ''' shape: (batchsize, nsteps, len_vec) '''
        _X = tf.transpose(_X, [1, 0, 2])
        ''' shape: (nsteps, batchsize, len_vec) '''
        _X = tf.reshape(_X, [self.nsteps * self.batchsize, self.len_vec])
        ''' shape: n_steps * (batchsize, len_vec) '''
        _X = tf.split(0, self.nsteps, _X)

        lstm_cell = tf.nn.rnn_cell.LSTMCell(self.len_vec,
                                            self.len_vec,
                                            state_is_tuple=False)
        state = _istate
        for step in range(self.nsteps):
            pred, state = rnn.rnn(lstm_cell, [_X[step]],
                                  state,
                                  dtype=tf.float32)
            tf.get_variable_scope().reuse_variables()
            if step == 0: output_state = state

        batch_pred_feats = pred[0][:, 0:4096]
        batch_pred_coords = pred[0][:, 4097:4101]
        return batch_pred_feats, batch_pred_coords, output_state
예제 #33
0
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
                                num_encoder_symbols, num_decoder_symbols,
                                num_heads=1, output_projection=None,
                                feed_previous=False, dtype=tf.float32,
                                scope=None, initial_state_attention=False):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    decoder_inputs: a list of 1D int32 Tensors of shape [batch_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    num_heads: number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".
    initial_state_attention: If False (default), initial attentions are zero.
      If True, initialize the attentions from the initial state and attention
      states.

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with tf.variable_scope(scope or "embedding_attention_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
    encoder_outputs, encoder_states = rnn.rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [tf.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = tf.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection,
          feed_previous, initial_state_attention=initial_state_attention)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, states1 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, True,
          initial_state_attention=initial_state_attention)
      tf.get_variable_scope().reuse_variables()
      outputs2, states2 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, False,
          initial_state_attention=initial_state_attention)

      outputs = control_flow_ops.cond(feed_previous,
                                      lambda: outputs1, lambda: outputs2)
      states = control_flow_ops.cond(feed_previous,
                                     lambda: states1, lambda: states2)
      return outputs, states
	def __init__(self, config, load_from_arg_dic=None):
		"""
			initialize some parameters
		"""
		if load_from_arg_dic==None:
			load_from_arg_dic = {}
		self.batch_size = batch_size = config.batch_size
		self.seq_len = seq_len = config.seq_len
		self._max_grad_norm = config.max_grad_norm
		size = config.hidden_size
		ctx = config.ctx
		vocab_size = config.vocab_size
		num_label = config.num_label
		self._logger = logger = Ylogger.Ylogger("test", "log/text.txt")

		"""
			build graph
		"""
		lstm_cell = []
		for l in range(config.num_layers):
			if l == 0:
				p = 0
			else:
				p = config.dropout
			lstm_cell.append(rnn_cell.LSTMCell(size, layeridx=l, dropout=p))
		senti_cell = rnn_cell.MultiRNNCell(lstm_cell)

		lstm_cell_extra = []
		for l in range(config.num_layers):
			if l == 0:
				p = 0
			else:
				p = config.dropout
			lstm_cell_extra.append(rnn_cell.LSTMCellWithExtraInput( \
												size, layeridx=l, dropout=p))
		lm_cell = rnn_cell.MultiRNNCell(lstm_cell_extra)

		inputs = []
		senti_embed_weight = mx.sym.Variable("senti_embed_weight")
		senti_cls_weight = mx.sym.Variable("senti_cls_weight")
		senti_cls_bias = mx.sym.Variable("senti_cls_bias")
		lm_embed_weight = mx.sym.Variable("lm_embed_weight")
		lm_cls_weight = mx.sym.Variable("lm_cls_weight")
		lm_cls_bias = mx.sym.Variable("lm_cls_bias")
		data = [mx.sym.Variable("t%d_data" % t) \
								for t in xrange(config.seq_len)]
		senti = [mx.sym.Variable("t%d_senti" % t) \
								for t in xrange(config.seq_len)]
		senti_mask = mx.sym.Variable("senti_mask")
		lm_mask = mx.sym.Variable("lm_mask")
		senti_params = [senti_embed_weight]
		lm_params = [lm_embed_weight]

		for l in xrange(config.num_layers):
			dic = {}
			dic["i2h_weight"] = mx.sym.Variable("senti_l%d_i2h_weight" % l)
			dic["i2h_bias"] = mx.sym.Variable("senti_l%d_i2h_bias" % l)
			dic["h2h_weight"] = mx.sym.Variable("senti_l%d_h2h_weight" % l)
			dic["h2h_bias"] = mx.sym.Variable("senti_l%d_h2h_bias" % l)
			senti_params.append(dic)

		for l in xrange(config.num_layers):
			dic = {}
			dic["i2h_weight"] = mx.sym.Variable("lm_l%d_i2h_weight" % l)
			dic["i2h_bias"] = mx.sym.Variable("lm_l%d_i2h_bias" % l)
			dic["h2h_weight"] = mx.sym.Variable("lm_l%d_h2h_weight" % l)
			dic["h2h_bias"] = mx.sym.Variable("lm_l%d_h2h_bias" % l)
			dic["s2h_weight"] = mx.sym.Variable("lm_l%d_s2h_weight" % l)
			dic["s2h_bias"] = mx.sym.Variable("lm_l%d_s2h_bias" % l)
			lm_params.append(dic)

		senti_cell = rnn_cell.EmbeddingWrapper(senti_cell, size)
		lm_cell = rnn_cell.EmbeddingWrapperWithExtraInput(lm_cell, size)

		initial_state = \
				[(mx.sym.Variable("%d_init_c" % i),
				  mx.sym.Variable("%d_init_h" % i))
				  for i in xrange(config.num_layers)]
		senti_outputs, senti_states = rnn.rnn(senti_cell, senti,
								  {"cell": senti_params,
								   "initial_state": initial_state})
		lm_outputs, lm_states = rnn.rnn(lm_cell,
										map(list, zip(data, senti_outputs)),
								  {"cell": lm_params,
								   "initial_state": initial_state})
		
		senti_ct = mx.sym.Concat(*senti_outputs, dim=0)
		lm_ct = mx.sym.Concat(*lm_outputs, dim=0)
		if config.dropout > 0.:
			senti_ct = mx.sym.Dropout(data=senti_ct, p=config.dropout)
			lm_ct = mx.sym.Dropout(data=lm_ct, p=config.dropout)
		senti_fc = mx.sym.FullyConnected(data=senti_ct,
								  		weight=senti_cls_weight,
								   		bias=senti_cls_bias,
								   		num_hidden=num_label)
		lm_fc = mx.sym.FullyConnected(data=lm_ct,
								  	  weight=lm_cls_weight,
								   	  bias=lm_cls_bias,
								   	  num_hidden=vocab_size)
		senti_label = mx.sym.Variable("senti_label")
		senti_sm = mx.sym.SoftmaxMaskOutput(data=senti_fc,
											label=senti_label,
											mask=senti_mask,
											name="senti_sm")
		lm_label = mx.sym.Variable("lm_label")
		lm_sm = mx.sym.SoftmaxMaskOutput(data=lm_fc,
										 label=lm_label,
										 mask=lm_mask,
										 name="lm_sm")
		senti_unpack_c = []
		senti_unpack_h = []
		for i, (c, h) in enumerate(senti_states[-1]):
			senti_unpack_c.append(mx.sym.BlockGrad(c, \
										name="senti_l%d_last_c" % i))
			senti_unpack_h.append(mx.sym.BlockGrad(h, \
										name="senti_l%d_last_h" % i))

		lm_unpack_c = []
		lm_unpack_h = []
		for i, (c, h) in enumerate(lm_states[-1]):
			lm_unpack_c.append(mx.sym.BlockGrad(c, \
										name="lm_l%d_last_c" % i))
			lm_unpack_h.append(mx.sym.BlockGrad(h, \
										name="lm_l%d_last_h" % i))
		rnn_sym = mx.sym.Group([senti_sm] + [lm_sm] +
								senti_unpack_c + senti_unpack_h +
								lm_unpack_c + lm_unpack_h)
		#dot = visualization.plot_network(rnn_sym)
		#dot.render("ptb.gv", view=True)

		"""
			produce interfaces for outside
		"""
		logger(rnn_sym.list_arguments())
		logger(rnn_sym.list_outputs())
		arg_names = rnn_sym.list_arguments()
		input_shapes = {}
		for name in arg_names:
			if name.endswith("init_c") or name.endswith("init_h"):
				input_shapes[name] = (batch_size, size)
			elif name.endswith("data"):
				input_shapes[name] = (batch_size, vocab_size)
			elif name.endswith("senti"):
				input_shapes[name] = (batch_size, 2)
			else:
				pass
		self._rnn_exec = rnn_exec = rnn_sym.simple_bind(ctx, "add", **input_shapes)

		arg_dict = dict(zip(arg_names, rnn_exec.arg_arrays))
		self.arg_dict = arg_dict
		self._param_blocks = []
		initializer = mx.initializer.Uniform(config.init_scale)
		for i, name in enumerate(arg_names):
			if is_param_name(name):
				#logger("init "+name)
				initializer(name, arg_dict[name])
				self._param_blocks.append((arg_dict[name],\
										   rnn_exec.grad_arrays[i]))
		for i, name in enumerate(arg_names):
			if name in load_from_arg_dic:
				logger("load parameter" + name)
				load_from_arg_dic[name].copyto(arg_dict[name])

		self._seq_data = [arg_dict["t%d_data" % t] for t in xrange(seq_len)]
		self._seq_senti = [arg_dict["t%d_senti" % t] for t in xrange(seq_len)]
		self._senti_mask = arg_dict["senti_mask"]
		self._lm_mask = arg_dict["lm_mask"]
		self._init_states = [(arg_dict["%d_init_c" % l],
							  arg_dict["%d_init_h" % l]) \
							  for l in xrange(config.num_layers)]
		out_dict = dict(zip(rnn_sym.list_outputs(), rnn_exec.outputs))
		self._senti_last_states = [(out_dict["senti_l%d_last_c_output" % l],
							  		out_dict["senti_l%d_last_h_output" % l]) \
							  		for l in xrange(config.num_layers)]
		self._lm_last_states = [(out_dict["lm_l%d_last_c_output" % l],
							  	 out_dict["lm_l%d_last_h_output" % l]) \
							  	 for l in xrange(config.num_layers)]
		self._senti_labels = arg_dict["senti_label"]
		self._lm_labels = arg_dict["lm_label"]
		self._senti_outputs = out_dict["senti_sm_output"]
		self._lm_outputs = out_dict["lm_sm_output"]

		self._opt = mx.optimizer.create("sgd", wd=0, momentum=0,
										learning_rate=config.learning_rate)
		self._last_loss = 1e10
		self._updater = mx.optimizer.get_updater(self._opt)
		self._decay_when = config.decay_when
		self._lr_decay = config.lr_decay
예제 #35
0
def embedding_attention_seq2seq(encoder_inputs, decoder_inputs, cell,
                                num_encoder_symbols, num_decoder_symbols,
                                num_heads=1, output_projection=None,
                                feed_previous=False, dtype=tf.float32,
                                scope=None):
  """Embedding sequence-to-sequence model with attention.

  This model first embeds encoder_inputs by a newly created embedding (of shape
  [num_encoder_symbols x cell.input_size]). Then it runs an RNN to encode
  embedded encoder_inputs into a state vector. It keeps the outputs of this
  RNN at every step to use for attention later. Next, it embeds decoder_inputs
  by another newly created embedding (of shape [num_decoder_symbols x
  cell.input_size]). Then it runs attention decoder, initialized with the last
  encoder state, on embedded decoder_inputs and attending to encoder outputs.

  Args:
    encoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    decoder_inputs: a list of 2D Tensors [batch_size x cell.input_size].
    cell: rnn_cell.RNNCell defining the cell function and size.
    num_encoder_symbols: integer; number of symbols on the encoder side.
    num_decoder_symbols: integer; number of symbols on the decoder side.
    num_heads: number of attention heads that read from attention_states.
    output_projection: None or a pair (W, B) of output projection weights and
      biases; W has shape [cell.output_size x num_decoder_symbols] and B has
      shape [num_decoder_symbols]; if provided and feed_previous=True, each
      fed previous output will first be multiplied by W and added B.
    feed_previous: Boolean or scalar Boolean Tensor; if True, only the first
      of decoder_inputs will be used (the "GO" symbol), and all other decoder
      inputs will be taken from previous outputs (as in embedding_rnn_decoder).
      If False, decoder_inputs are used as given (the standard decoder case).
    dtype: The dtype of the initial RNN state (default: tf.float32).
    scope: VariableScope for the created subgraph; defaults to
      "embedding_attention_seq2seq".

  Returns:
    outputs: A list of the same length as decoder_inputs of 2D Tensors with
      shape [batch_size x num_decoder_symbols] containing the generated outputs.
    states: The state of each decoder cell in each time-step. This is a list
      with length len(decoder_inputs) -- one item for each time-step.
      Each item is a 2D Tensor of shape [batch_size x cell.state_size].
  """
  with tf.variable_scope(scope or "embedding_attention_seq2seq"):
    # Encoder.
    encoder_cell = rnn_cell.EmbeddingWrapper(cell, num_encoder_symbols)
    encoder_outputs, encoder_states = rnn.rnn(
        encoder_cell, encoder_inputs, dtype=dtype)

    # First calculate a concatenation of encoder outputs to put attention on.
    top_states = [tf.reshape(e, [-1, 1, cell.output_size])
                  for e in encoder_outputs]
    attention_states = tf.concat(1, top_states)

    # Decoder.
    output_size = None
    if output_projection is None:
      cell = rnn_cell.OutputProjectionWrapper(cell, num_decoder_symbols)
      output_size = num_decoder_symbols

    if isinstance(feed_previous, bool):
      return embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection,
          feed_previous)
    else:  # If feed_previous is a Tensor, we construct 2 graphs and use cond.
      outputs1, states1 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, True)
      tf.get_variable_scope().reuse_variables()
      outputs2, states2 = embedding_attention_decoder(
          decoder_inputs, encoder_states[-1], attention_states, cell,
          num_decoder_symbols, num_heads, output_size, output_projection, False)

      outputs = tf.control_flow_ops.cond(feed_previous,
                                         lambda: outputs1, lambda: outputs2)
      states = tf.control_flow_ops.cond(feed_previous,
                                        lambda: states1, lambda: states2)
      return outputs, states
예제 #36
0
    X, Y = pickle.load(fp)

for i in range(nb_unknown_words):
    idx2word[vocab_size - 1 - i] = '<%d>' % i

oov0 = vocab_size - nb_unknown_words

X_train, X_test, Y_train, Y_test = train_test_split(X,
                                                    Y,
                                                    test_size=nb_val_samples,
                                                    random_state=seed)

idx2word[empty] = '_'
idx2word[eos] = '~'

model = rnn(vocab_size, embedding_size, maxlen, embedding, maxlend, maxlenh)
model.add(
    TimeDistributed(
        Dense(vocab_size,
              kernel_regularizer=regularizer,
              bias_regularizer=regularizer,
              name='timedistributed_1')))
model.add(Activation('softmax', name='activation_1'))

model.compile(loss='categorical_crossentropy', optimizer=optimizer)
K.set_value(model.optimizer.lr, np.float32(LR))
model.summary()

if False:
    model.load_weights('model.hdf5')
예제 #37
0
	def init_rnn(self,L1,L2):
		self.rnn_model = rnn(70,self.length,4,L1,L2)
예제 #38
0
def main():

    myrnn = rnn.rnn()
    myrnn.request()
예제 #39
0
########## Normalize time-series values #############
mean = np.mean(time_series_data)
stdev = np.sqrt(np.var(time_series_data))
time_series_data_normalized = ((time_series_data-mean)/stdev)[:1000]

################## Data preparation ############################

X_column_list = [0]
y_column_list = [0]
number_of_delays = 8
test_fraction = 0.5

X_train,y_train,X_test,y_test = data_prep.prepare(time_series_data_normalized,X_column_list,y_column_list,number_of_delays,test_fraction)

########### Model RNN using rnn class ###############
rnn1 = rnn.rnn()
rnn1.initialize(X_train, y_train,hidden_units_1=3,hidden_units_2=3,activation_1='tanh',activation_2='tanh')
rnn1.get_parameters()

initial_theta=np.asarray(list(rnn1.parameters.values()))
(loss_vector,loss_val_vector)=rnn1.train(rnn1.X,rnn1.y,initial_theta,train_validation_split=0.7,epochs=10000,restore_best_theta=True)

y_pred=rnn1.predict(X_test)
rnn1.print_dashed_line()
print("Test error:")
print(metrics.mean_squared_error(y_test[:,0],y_pred[:,0]))
rnn1.print_dashed_line()

############ Plotting loss function, attractor and time-series predictions ###############
print("Plotting training and validation losses...")
plt.plot(loss_vector,label="Training")
예제 #40
0
	def init_rnn_model(self,num_hidden,L1,L2):
		self.rnn_model = rnn(num_hidden,len(self.dic),len(self.dic),L1,L2)