Beispiel #1
0
def main():
    argv = sys.argv

    if len(argv) != 4:
        print('Usage: ' + argv[0] + ' model_name dataset original_dataset')
        sys.exit(0)

    model_name = argv[1]
    data = datautils.load(argv[2])
    original_data = datautils.load(argv[3])
    normalized, mean, std = datautils.normalize(data)

    (eval_sequences, cuts_indexes) = split_evaluation_sequences(normalized)
    """eval"""
    model = LSTM()

    clean = np.empty(0)
    for head, tail in eval_sequences:
        if len(clean) == 0:
            y_init = 0
        else:
            y_init = clean[-1]
        head_diff = datautils.differentiate(head, y_init)

        projection = model.evaluate(model_name, head_diff, tail)

        head = datautils.undifferentiate(head_diff, y_init)
        projection = datautils.undifferentiate(projection, head[-1])

        clean = np.concatenate((clean, head, projection))
    """plot"""
    clean_denorm = datautils.denormalize(clean, mean, std)
    utils.plot_multiple([original_data, clean_denorm], [0, 0],
                        vertical_lines=cuts_indexes)
Beispiel #2
0
    def __init__(self,
                 input_size: int = INPUT_SIZE,
                 output_size: int = OUTPUT_SIZE,
                 hidden_size: int = HIDDEN_SIZE,
                 embed_size: int = EMBED_SIZE,
                 lr: float = LEARNING_RATE,
                 clip_grad: float = CLIP_GRAD,
                 init_range: float = INIT_RANGE):
        input_layers = [
            Embedding(input_size, embed_size, init_range),
            LSTM(embed_size, hidden_size, init_range)
        ]

        output_layers = [
            Embedding(output_size, embed_size, init_range),
            LSTM(embed_size, hidden_size, init_range,
                 previous=input_layers[1]),
            Softmax(hidden_size, output_size, init_range)
        ]

        self.input_layers, self.output_layers = input_layers, output_layers
        self.hidden_size = hidden_size
        self.embed_size = embed_size
        self.input_size = input_size
        self.output_size = output_size
        self.lr = lr
        self.clip_grad = clip_grad
    def __init__(self,
                 input_dim,
                 hidden_dim,
                 output_dim,
                 layers=1,
                 bidirectional=False,
                 layernorm=False):
        super().__init__()
        self.name = 'lstm'

        self.input_dim = input_dim
        self.hidden_dim = hidden_dim
        self.output_dim = output_dim
        self.layers = layers
        self.bidirectional = bidirectional
        self.layernorm = layernorm

        self.lstm = LSTM(input_dim=input_dim,
                         hidden_dim=hidden_dim,
                         layers=layers,
                         bidirectional=bidirectional,
                         layernorm=layernorm)
        if self.bidirectional:
            self.fc = nn.Linear(2 * hidden_dim, output_dim)
        else:
            self.fc = nn.Linear(hidden_dim, output_dim)
Beispiel #4
0
 def create_model(self):
     self.log('Creating model')
     self.log('vocab size : ' + str(len(self.vocab_to_ints)))
     self.model = LSTM(input_units=self.maxlen,
                       hidden_units=self.hidden_dim,
                       vocab_size=len(self.vocab_to_ints) + 1,
                       embedding_dim=self.embedding_size)  #.to(device)
Beispiel #5
0
def courseratest():
    np.random.seed(1)
    x_dim = 3
    n_examples = 10
    time_steps = 7
    hidden_dim = 5

    da = np.random.randn(5, 10, 4)
    x = np.ones((time_steps, n_examples, x_dim))
    for i in range(time_steps):
        for j in range(n_examples):
            for k in range(x_dim):
                x[i, j, k] = np.random.randn()
    from functions import xavier_init
    net = LSTM(hidden_dim, x_dim)
    states, caches, preds, ys = net.forward(
        x, np.zeros((time_steps, n_examples, 1)))
    # print(states[-1]['z'])
    # print(states[-1]['c_out'])
    # print(states[-1]['f'])
    # print(states[-1]['u'])
    # print(states[-1]['o'])
    da_next = np.zeros_like(da[:, :, 0])
    dc_next = np.zeros_like(states[0]['c'])
    grads = net.cell.init_grads()
    for t in reversed(range(4)):
        da_next, dc_next, grad_adds = net.cell.backward(
            states[t], da[:, :, t] + da_next, dc_next)
        for gate in ['c', 'u', 'o', 'f']:
            grads[gate]['w'] += grad_adds[gate]['w']
            grads[gate]['b'] += grad_adds[gate]['b']
        print(grad_adds['f']['b'])
Beispiel #6
0
def compareFixed():
    t = Tasks()
    x_test, y_test = t.sequence_type_1(100)

    add_params, mul_params = torch.load('program_memory/add.pt'), torch.load(
        'program_memory/mul.pt')
    hnm = HNM(10, 20, add_params, mul_params)
    hnm.load_state_dict(torch.load("learned_params/hnm_arch_2.pt"))

    ntm = NTM(10, 20)
    ntm.load_state_dict(torch.load("learned_params/ntm.pt"))

    lstm = LSTM(14, 256, 325, 1)
    lstm.load_state_dict(torch.load("learned_params/lstm.pt"))

    hnm_diff, lstm_diff, ntm_diff = 0, 0, 0

    for i in range(len(x_test)):
        hnm_out = hnm.recurrent_forward(x_test[i:i + 1])
        ntm_out = ntm.recurrent_forward(x_test[i:i + 1])
        lstm_out = lstm.recurrent_forward(x_test[i:i + 1])

        answer = np.argmax(y_test[i:i + 1].detach().numpy())
        hnm_diff += abs(answer - np.argmax(hnm_out.detach().numpy()))
        ntm_diff += abs(answer - np.argmax(ntm_out.detach().numpy()))
        lstm_diff += abs(answer - np.argmax(lstm_out.detach().numpy()))

    print(hnm_diff / len(y_test), ntm_diff / len(y_test),
          lstm_diff / len(y_test))
    def __init__(self, is_training=False):
        print(
            datetime.datetime.fromtimestamp(time.time()).strftime(
                '%Y-%m-%d %H:%M:%S') +
            ": Checking for data sets, downloading if needed...")
        data.check_all_unzip()

        print(
            datetime.datetime.fromtimestamp(time.time()).strftime(
                '%Y-%m-%d %H:%M:%S') + ": Initializing preproc...")
        self.__preproc = Preprocessor(evidence_length=EVIDENCE_LENGTH,
                                      hypothesis_length=HYPOTHESIS_LENGTH,
                                      vector_size=VECTOR_SIZE)

        print(
            datetime.datetime.fromtimestamp(time.time()).strftime(
                '%Y-%m-%d %H:%M:%S') + ": Processing GloVe Vector Data...")
        self.__preproc.setup_word_map(file=datasets.glove_vectors_840B_300d)
        self.__df_list = None
        self.__c_scores = None

        if is_training:
            print(
                datetime.datetime.fromtimestamp(time.time()).strftime(
                    '%Y-%m-%d %H:%M:%S') +
                ": Updating data scores for training...")
            self.__df_list, self.__c_scores = self.__preproc.update_data_scores(
                file=datasets.snli_full_dataset_file)

        print(
            datetime.datetime.fromtimestamp(time.time()).strftime(
                '%Y-%m-%d %H:%M:%S') + ": Initializing LSTM...")
        self.__lstm = LSTM(e_length=self.__preproc.get_evidence_length(),
                           h_length=self.__preproc.get_hypothesis_length(),
                           v_size=self.__preproc.get_vector_size())
Beispiel #8
0
def simplefunc():
    time_steps = 10
    x_dim = 8
    hidden_dim = 8
    output_dim = 8
    n_examples = 2048
    batch_size = 256
    x = np.random.randn(time_steps, n_examples, x_dim)
    y = np.random.randn(time_steps, n_examples, output_dim)
    x = np.ones((time_steps, n_examples, x_dim))
    y = np.ones((time_steps, n_examples, output_dim)) / 4.5
    y[4:, :, :] = y[4:, :, :] * 3.6
    net = LSTM(hidden_dim, x_dim, output_dim=output_dim, learning_rate=1e-5)
    losses = []
    for i in range(5000):
        start = time.time()
        loss = 0
        for data, targets in minibatch_gen(x, y, batch_size):
            loss += np.mean(net.fit(data, targets))
        losses.append(loss)
        print('Epoch {}: loss: {} time: {}'.format(i, loss,
                                                   time.time() - start),
              end='\r',
              flush=True)

    print('\nEpoch {}: loss: {} time: {}'.format(i, loss,
                                                 time.time() - start),
          end='\r',
          flush=True)
Beispiel #9
0
 def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise()):
     self.forward = LSTM(n_in, units)
     self.backward = LSTM(units, n_topics)
     self.trans = DirichletTransition(n_topics)
     self.emit = Emmission(n_topics, n_in)
     self.sparsity = sparsity
     self.noise = noise
Beispiel #10
0
def main():
    argv = sys.argv

    if len(argv) != 3:
        print('Usage: ' + argv[0] + ' model_name dataset')
        sys.exit(0)

    model_name = argv[1]
    data = datautils.load(argv[2])

    normalized, mean, std = datautils.normalize(data)
    normalized, _ = datautils.differentiate(normalized)
    (train, test) = datautils.split(normalized, 0.7)

    # utils.plot_data(data)

    print("training set length: {}".format(len(train)))
    print("test set length: {}".format(len(test)))
    """train"""
    model = LSTM()
    time_steps = 20  # window size
    batch_size = 5  # data augmentation
    history = model.train(model_name, train, 130, batch_size, time_steps)
    utils.plot_history(history)
    """test"""
    head = int(len(test) * 0.6)
    tail = len(test) - head
    projection = model.evaluate(model_name, test[:head], tail)
    """plot"""
    test = datautils.undifferentiate(test, sum(train))
    projection = datautils.undifferentiate(projection, sum(train) + sum(test))
    testset_denorm = datautils.denormalize(test, mean, std)
    results_denorm = datautils.denormalize(projection, mean, std)
    utils.plot_multiple([testset_denorm, results_denorm], [0, head + 1])
def pipeline(output_window):
    '''
    Apply training of lstm and portfolio optimization
    :param output_window: time windows length of LSTM predictions
    :return:
    '''
    data_autoen = get_data(scale=True)
    train_data, test_data, train_labels, test_labels = get_train_test(data_autoen,
                                                                      windows_size=net_config.windows_size,
                                                                      output_window=output_window)
    #data = get_data(windows_size=net_config.windows_size)
    batches = create_batches(train_data, batch_size=net_config.batch_size)
    labels_batches = create_batches(train_labels, batch_size=net_config.batch_size)

    with tf.Graph().as_default():
        lstm_model = LSTM(input_size=data_autoen.shape[-1],
                          output_size=data_autoen.shape[-1],
                          rnn_hidden=net_config.rnn_hidden,
                          window_size=net_config.windows_size,
                          window_output=output_window,
                          learning_rate=net_config.learning_rate,
                          )
        with tf.Session(config=config_proto) as sess:
            if net_config.lstm_from_file:
                lstm_model.saver.restore(sess, tf.train.latest_checkpoint('lstm-price/'))
            else:
                LSTM.train_lstm(sess, lstm_model, batches, labels_batches, test_data, test_labels,
                                net_config.lstm_epochs, net_config.dropout_prob)
                lstm_model.saver.save(sess, "lstm-price/model", global_step=net_config.lstm_epochs)
            '''
            feed_dict = {
                lstm_model.inputs: test_data[0:1, :, :],
                lstm_model.dropout_prob: 1.0
            }
            
            preds = sess.run(lstm_model.predicted_outputs, feed_dict=feed_dict)
            n_stocks = data_autoen.shape[-1]
            fig, axes = plt.subplots(int(n_stocks / 2), 2)
            i = 0
            for n in range(int(n_stocks / 2)):
                for j in range(2):
                    axes[n, j].plot(preds[0, :, i], label='predicted')
                    axes[n, j].plot(test_labels[0, :, i], label='true')
                    i += 1
            plt.legend()
            plt.show()
            '''

            df_close = pd.DataFrame(test_data[:, -1, :])
            df_close = pd.DataFrame(scaler.inverse_transform(df_close))
            df_close = df_close.pct_change().dropna()

            train_dataset = concat(train_data, train_labels)
            all_weights, pnls = backtesting_optim_portfolio(
                df_close, test_data, test_labels, train_dataset, lstm_model, sess, net_config.risk_aversion, output_window, net_config)
            print(all_weights.shape)

            performances = compute_performance(pnls)
            print(performances)
            return np.array(performances), all_weights, pnls
Beispiel #12
0
    def __init__(self, args):
        super().__init__()
        self.features = Features(args)
        self.rnn1 = LSTM(Features.size + args.button_num, self.rnn_size)
        self.rnn2 = LSTM(self.rnn_size, self.rnn_size)
        self.features_bn = nn.BatchNorm1d(Features.size + args.button_num)

        self.pred = nn.Linear(self.rnn_size, args.variable_num * 3)
Beispiel #13
0
def compare():

    obstacle, wall_cw, wall_awc = Obstacle(), WallCW(), WallACW()
    obstacle_params, wall_cw_params, wall_acw_params = torch.load(
        'program_memory/move.pt'), torch.load(
            'program_memory/cw.pt'), torch.load('program_memory/acw.pt')
    networks = [obstacle, wall_cw, wall_awc]
    params = [obstacle_params, wall_cw_params, wall_acw_params]
    hnm = HNM(10, 14, networks, params)
    hnm.load_state_dict(torch.load('learned_params/hnm.pt'))

    ntm = NTM(10, 14)
    ntm.load_state_dict(torch.load('learned_params/ntm.pt'))

    lstm = LSTM(14, 64, 3, 1)
    lstm.load_state_dict(torch.load('learned_params/lstm.pt'))

    testX, testY = getTestData()

    hnm_correct, ntm_correct, lstm_correct = 0, 0, 0
    totSamples = 0

    for i in range(0, 25):

        s = torch.from_numpy(np.array(testX[i:i + 1][0])).float().unsqueeze(0)
        s_lstm = s.view(s.size()[0], s.size()[2], -1)
        l = np.array(testY[i:i + 1][0])

        print(i)

        (hnm_read_weights, hnm_write_weights) = hnm._initialise()
        (ntm_read_weights, ntm_write_weights) = ntm._initialise()
        lstm_h = lstm.h0.expand(s_lstm.size()[0], 64)
        lstm_c = lstm.c0.expand(s_lstm.size()[0], 64)

        for j in range(s.size()[1]):

            (hnm_out, hnm_read_weights,
             hnm_write_weights) = hnm.forward(s[:, j, :], hnm_read_weights,
                                              hnm_write_weights)
            (ntm_out, ntm_read_weights,
             ntm_write_weights) = ntm.forward(s[:, j, :], ntm_read_weights,
                                              ntm_write_weights)
            lstm_h, lstm_c, lstm_out = lstm.forward(s_lstm[:, :, j], lstm_h,
                                                    lstm_c)

            if np.argmax(hnm_out.detach().numpy()) == np.argmax(l[j]):
                hnm_correct += 1
            if np.argmax(ntm_out.detach().numpy()) == np.argmax(l[j]):
                ntm_correct += 1
            if np.argmax(lstm_out.detach().numpy()) == np.argmax(l[j]):
                lstm_correct += 1

            totSamples += 1

    print(hnm_correct, ntm_correct, lstm_correct)
    print(totSamples)
Beispiel #14
0
 def __init__(self):
     self.dropout_rate = DROPOUT_RATE
     self.psrc_lookup = Parameter()
     self.ptrg_lookup = Parameter()
     self.pwhy = Parameter()
     self.pby = Parameter()
     self.src_lstm = LSTM()
     self.trg_lstm = LSTM()
     self.scan_attributes()
Beispiel #15
0
 def create_lstm(self):
     """
     Creates all neural networks and the vanilla lstm
     """
     nn_f = self.create_neural_network(layer.SigmoidLayer(), constants.SIG_F_POS)
     nn_i = self.create_neural_network(layer.SigmoidLayer(), constants.SIG_I_POS)
     nn_c = self.create_neural_network(layer.TanhLayer(), constants.TANH_C_POS)
     nn_o = self.create_neural_network(layer.SigmoidLayer(), constants.SIG_O_POS)
     self.lstm = LSTM(nn_f, nn_i, nn_c, nn_o, constants.TAU_QUANTILE, constants.LEARNING_RATE)
Beispiel #16
0
def quandltest():
    LOAD = False

    tickers = ['INTC', 'AMD']
    date = {'gte': '2016-10-10', 'lte': '2017-09-01'}
    columns = {'columns': ['ticker', 'date', 'close']}
    datasets = []
    for ticker in tickers:
        datasets.append(
            quandl.get_table('WIKI/PRICES',
                             qopts=columns,
                             ticker=ticker,
                             date=date))
    for dataset in datasets:
        dataset.rename(columns={'close': dataset['ticker'].iloc[0]},
                       inplace=True)
        dataset.drop('ticker', axis=1, inplace=True)

    df = reduce(lambda l, r: pd.merge(l, r, on='date'), datasets)
    df.index = df['date']
    df.drop('date', axis=1, inplace=True)
    df = (df / df.iloc[-1]).diff()[1:]
    x_dim, output_dim = len(df.iloc[0]), len(df.iloc[0])

    hidden_dim = 80
    time_steps = 30
    batch_size = 20
    if LOAD:
        with open('amd_intel_net.pkl', 'rb') as f:
            net = pickle.load(f)
    else:
        net = LSTM(hidden_dim,
                   x_dim,
                   output_dim=output_dim,
                   learning_rate=2e-2)

    for i in range(300000):

        batch = np.array(
            random_time_batch(df,
                              time_steps=time_steps + 1,
                              batch_size=batch_size))
        start = time.time()
        x = batch[:-1, :, :]
        y = batch[1:, :, :]
        loss = np.sum(net.fit(x, y))
        print('Epoch {}: loss: {} time: {}'.format(i, loss,
                                                   time.time() - start),
              end='\r',
              flush=True)
        if i != 0 and i % 5000 == 0:
            with open('amd_intel_net.pkl', 'wb') as f:
                pickle.dump(net, f)
        if i % 3000 == 0:
            net.cell.learning_rate = net.cell.learning_rate * 0.5
            net.activation.learning_rate = net.activation.learning_rate * 0.5
Beispiel #17
0
 def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise(), trans_weight=1.0):
     self.forward = LSTM(n_in, units)
     self.backward = LSTM(units, n_topics)
     self.linear = Linear(n_topics, n_topics)
     self.trans = DirichletTransition(n_topics)
     self.emit = Emmission(n_topics, n_in)
     self.sparsity = sparsity
     self.noise = noise
     self.n_topics = n_topics
     self.n_in = n_in
     self.trans_weight = trans_weight
Beispiel #18
0
    def _build_model(self):
        """Build traning model.

        First embed user/item inputs into training dimension, then feed them in 
        LSTMCell. Further affine the matrics into emission dimension after LSTM
        and emit the prediction.
        """
        phase = 'ENCODE'
        with tf.variable_scope(phase):
            self.encode_user = Transform(self.user_input, self.user_hparas,
                                         phase)
            self.encode_item = Transform(self.item_input, self.item_hparas,
                                         phase)

        phase = 'LSTM'
        with tf.variable_scope(phase):
            self.lstm_user = LSTM(self.encode_user.output, self.user_hparas)
            self.lstm_item = LSTM(self.encode_item.output, self.item_hparas)

        phase = 'AFFINE'
        with tf.variable_scope(phase):
            self.trans_user = Transform(self.lstm_user.output,
                                        self.user_hparas, phase)
            self.trans_item = Transform(self.lstm_item.output,
                                        self.item_hparas, phase)

        phase = 'EMISSION'
        with tf.variable_scope(phase):
            self.dynamic_state = tf.einsum('ijl,kjl->jik',
                                           self.trans_user.output,
                                           self.trans_item.output,
                                           name='dynamic_state')
            self.stationary_state = tf.matmul(self.user_stationary_factor,
                                              self.item_stationary_factor,
                                              transpose_b=True,
                                              name='stationary_state')

            if self.loss_function == 'log_loss':
                logits = tf.add(self.dynamic_state * 0.5,
                                self.stationary_state * 0.5,
                                name='logits')
                self.logits = tf.nn.sigmoid(logits, name='logits_activation')

            elif self.loss_function == 'rmse':
                logits = tf.add(self.dynamic_state,
                                self.stationary_state,
                                name='logits')
                self.logits = tf.nn.relu(logits, name='logits_activation')
            else:
                raise NotImplementedError(
                    "Didn't implement the loss function yet.")

            self.logits_last = self.logits[-1, :, :]
Beispiel #19
0
 def __init__(self):
     self.model_lstm = LSTM(3136, 1045)
     #you should select path of saving parameters
     d_name = 'lstm_params/1epoch_params/'
     self.model_lstm.l1_x.W = np.load(d_name + 'l1_x_W.npy')
     self.model_lstm.l1_x.b = np.load(d_name + 'l1_x_b.npy')
     self.model_lstm.l1_h.W = np.load(d_name + 'l1_h_W.npy')
     self.model_lstm.l1_h.b = np.load(d_name + 'l1_h_b.npy')
     self.model_lstm.l6.W = np.load(d_name + 'l6_W.npy')
     self.model_lstm.l6.b = np.load(d_name + 'l6_b.npy')
     cuda.get_device(0).use()
     self.model_lstm = self.model_lstm.to_gpu()
Beispiel #20
0
class TopicLSTM(object):
    def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise(), trans_weight=1.0):
        self.forward = LSTM(n_in, units)
        self.backward = LSTM(units, n_topics)
        self.linear = Linear(n_topics, n_topics)
        self.trans = DirichletTransition(n_topics)
        self.emit = Emmission(n_topics, n_in)
        self.sparsity = sparsity
        self.noise = noise
        self.n_topics = n_topics
        self.n_in = n_in
        self.trans_weight = trans_weight

    @property
    def weights(self):
        return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights

    def transform(self, X, mask=None):
        Z_f, _ = self.forward.scanl(X, mask=mask)
        Z, _ = self.backward.scanr(Z_f, mask=mask) #, activation=softmax)
        return logsoftmax(self.linear(Z))

    def loss(self, X, mask=None, flank=0, Z=None):
        if Z is None:
            Z = self.transform(self.noise(X), mask=mask)
        E = self.emit(Z)
        L = cross_entropy(E, X)
        C = confusion(T.argmax(E,axis=-1), X, E.shape[-1])
        if mask is not None:
            L *= T.shape_padright(mask)
            C *= T.shape_padright(T.shape_padright(mask))
        n = X.shape[0]
        return L[flank:n-flank], C[flank:n-flank]

    def gradient(self, X, mask=None, flank=0):
        Z = self.transform(self.noise(X), mask=mask)
        n = Z.shape[0]
        L, C = self.loss(X, mask=mask, flank=flank, Z=Z)
        loss = T.sum(L) #/ self.n_in
        Tr = self.trans(Z)
        if mask is not None:
            Tr *= mask
        if self.trans_weight > 0:
            loss -= self.trans_weight*T.sum(Tr[flank:n-flank]) #/ self.n_topics
        m = n-2*flank
        #loss += self.trans.regularizer()*m/self.n_topics
        if self.sparsity > 0:
            R = self.sparsity*Z
            if mask is not None:
                R *= T.shape_padright(mask)
            loss += T.sum(R[flank:n-flank])
        gW = theano.grad(loss, self.weights, disconnected_inputs='warn')
        return gW, [L.sum(axis=[0,1]),C.sum(axis=[0,1])]
def train_lstm(train,
               test,
               model_parameters=[4, 'mean_squared_error', 'adam', 100, 1]):
    history = int(model_parameters[0])
    loss = model_parameters[1]
    optimizer = model_parameters[2]
    epochs = int(model_parameters[3])
    batch_size = int(model_parameters[4])
    lstm = LSTM(train, history, loss, optimizer)
    lstm.lstm_train(epochs, batch_size)
    real_label, predicted_label = lstm.lstm_predict(test)
    print_report('LSTM', real_label, predicted_label)
Beispiel #22
0
def trainLSTM():
	lstm = LSTM(14, 64, 3, 1)

	X, y = [], []
	for i in range(10):
		tempX, tempY = getData("data/observations_"+str(i*500)+".npy", "data/actions_"+str(i*500)+".npy")
		X.extend(tempX)
		y.extend(tempY)

	print(len(X), len(y))

	lstm.train(X, y, maxEpoch=10, learning_rate=0.0006, mini_batch_size=1)
Beispiel #23
0
 def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size,
              hidden_size, dropout_rate):
     self.name_ = name
     self.dropout_rate_ = dropout_rate
     self.psrc_lookup_ = Parameter([embed_size, src_vocab_size],
                                   I.XavierUniform())
     self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size],
                                   I.XavierUniform())
     self.pwhy_ = Parameter([trg_vocab_size, hidden_size],
                            I.XavierUniform())
     self.pby_ = Parameter([trg_vocab_size], I.Constant(0))
     self.src_lstm_ = LSTM(name + "_src_lstm", embed_size, hidden_size)
     self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size, hidden_size)
Beispiel #24
0
def forecast_lstm(actions):
    lstm = LSTM(CONFIG)
    model = lstm.load_model()

    vocabulary = restore_vocabulary()
    actions_scores = readScores(CONFIG)
    previous_action = None
    for action in actions:
        if (action != ''):
            # Compare previous action with the incoming action.
            action = json.loads(action)
            incoming_action = action_to_vector(action, vocabulary)
            if (previous_action == None):
                previous_action = incoming_action
                continue

            score = getScore(actions_scores, action['name'])

            previous_action_transformed = lstm.pretransform_dataset(
                [previous_action], reshape=True)
            incoming_action_transformed = lstm.pretransform_dataset(
                [incoming_action], reshape=True)
            predicted = lstm.forecast(model, previous_action_transformed,
                                      incoming_action_transformed)
            # Print anomaly score and set the new one now as previous action.
            print lstm.calculate_score(incoming_action_transformed, predicted,
                                       score)

            previous_action = incoming_action
Beispiel #25
0
 def load(name, prefix):
     encdec = EncoderDecoder.__new__(EncoderDecoder)
     encdec.name_ = name
     encdec.psrc_lookup_ = Parameter.load(prefix + name +
                                          "_src_lookup.param")
     encdec.ptrg_lookup_ = Parameter.load(prefix + name +
                                          "_trg_lookup.param")
     encdec.pwhy_ = Parameter.load(prefix + name + "_why.param")
     encdec.pby_ = Parameter.load(prefix + name + "_by.param")
     encdec.src_lstm_ = LSTM.load(name + "_src_lstm", prefix)
     encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix)
     with open(prefix + name + ".config", "r") as ifs:
         encdec.dropout_rate_ = float(ifs.readline())
     return encdec
    def __init__(self,
                 feature_config_path,
                 dense_feature_config_path='',
                 notes_feature_name='',
                 use_notes_model=False,
                 notes_model_params=None,
                 variational_vocab_keep_prob=1.0,
                 notes_vocab_keep_prob=1.0,
                 notes_max_length=-1,
                 notes_num_splits=0,
                 bagging_timerange=3600,
                 bagging_aggregate_older_than=3600000,
                 is_training=True):
        """Params:

        feature_config_path: path to embedding config for discrete features.
        dense_feature_config_path: path to config file with statistics for
          continuous features.
        notes_feature_name: name of notes feature in inputs.
        use_notes_model: whether to run an LSTM on the notes.
        notes_model_params: dict with parameters for the notes LSTM.
        variational_vocab_keep_prob: vocabulary dropout rate for features other
          than notes.
        notes_vocab_keep_prob: vocabulary dropout rate for notes.
        notes_max_length: number of words to retain from notes per example.
        notes_num_splits: number of additional GPUs to distribute the notes
          LSTM across.
        bagging_timerange: length of timesteps to aggregate into a single bag
          (in seconds).
        bagging_aggregate_older_than: length of time before prediction beyond
          which all observations should be aggregated into a single bag (in
          seconds).
        is_training: whether model is in training or eval phase.
        """
        self._is_training = is_training
        self._feature_config_path = feature_config_path
        self._dense_feature_config_path = dense_feature_config_path
        self._notes_feature_name = notes_feature_name
        self._use_notes_model = use_notes_model
        self._variational_vocab_keep_prob = variational_vocab_keep_prob
        self._notes_vocab_keep_prob = notes_vocab_keep_prob
        self._notes_max_length = notes_max_length
        self._notes_num_splits = notes_num_splits
        self._bagging_timerange = bagging_timerange
        self._bagging_aggregate_older_than = bagging_aggregate_older_than
        self._notes_model = None
        if use_notes_model:
            self._notes_model_dim = notes_model_params['model_dim']
            self._notes_bidirectional = notes_model_params['bidirectional']
            self._notes_model = LSTM(**notes_model_params)
 def forward(self, xs):
     Wx, Wh, b = self.params
     N, T, D = xs.shape
     H = Wh.shape[0]
     hs = np.empty((N, T, H), dtype='f')
     if not self.stateful or self.h is None:
         self.h = np.zeros((N, H), dtype='f')
     if not self.stateful or self.c is None:
         self.c = np.zeros((N, H), dtype='f')
     for t in range(T):
         layer = LSTM(*self.params)
         self.h, self.c = layer.forward(xs[:, t, :], self.h, self.c)
         hs[:, t, :] = self.h
         self.layers.append(layer)
     return hs
Beispiel #28
0
def train_lstm():
    lstm = LSTM(CONFIG)

    vocabulary = create_and_save_vocabulary(LOG_FILE)

    print "Start preprocessing data"
    iter_generator = create_iter_generator(LOG_FILE)
    actions_vectorized = []
    for i, row in enumerate(iter_generator):
        action_vector = action_to_vector(row, vocabulary)
        actions_vectorized.append(action_vector)
    print "End preprocessing data"

    model = lstm.get_model()
    rmse = lstm.train_on_dataset(actions_vectorized, model)
Beispiel #29
0
def main():
    images, labels = load_mnist("D:/Computer Science/Github/Mnist-tensorflow/")
    images_test, labels_test = load_mnist("D:/Computer Science/Github/Mnist-tensorflow/", "t10k")

    # lenet = Lenet(images, labels, images_test, labels_test, 0.5, 100, 20000)
    # lenet.train()

    # alexnet = Alexnet(images, labels, images_test, labels_test, 0.5, 100, 300)
    # alexnet.train()

    # vgg16 = VGG16(images, labels, images_test, labels_test, 0.5, 100, 300)
    # vgg16.train()

    lstm = LSTM(images, labels, images_test, labels_test, keep_pb=0.5, batch_size=100, epoch_size=500)
    lstm.train_network()
Beispiel #30
0
def run_service():
    stocks = StockHelper.get_stock_symbol_mapping()
    for stock, symbol in stocks.items():
        logger.info(f"Starting training for {stock} [{symbol}] at {ctime()}")
        models = {
            "SVM": SVM(symbol, scaler=StandardScaler),
            "ARIMA": ARIMA(symbol, scaler=LogScaler),
            "LSTM": LSTM(symbol, scaler=MinMaxScaler, is_keras=True),
        }

        for model_name, model in models.items():
            logger.info(f"\tTraining {model_name} for {stock}")
            start_time = time()
            train_data = model.train_data

            n_days = 300
            test_data = Series(index=get_next_n_trading_days(n_days))

            predictions = model.fit_predict(n_days)
            predictions = Series(data=predictions, index=test_data.index[: len(predictions)])

            save_predictions(predictions, type(model).__name__, symbol, train_data.index.max().to_pydatetime())
            logger.info(f"\tTrained {model_name} for {stock} in {time() - start_time:.3f} seconds")

        logger.info(f"Finished training for {stock} [{symbol}] at {ctime()}")
Beispiel #31
0
def load_model(modelConfig, dataConfig, data):

    # Set number of cores for TensorFlow to use
    try:
        tf_config = tf.ConfigProto(
            inter_op_parallelism_threads=int(modelConfig['NumCores']),
            intra_op_parallelism_threads=int(modelConfig['NumCores']))
    except KeyError:
        tf_config = tf.ConfigProto()
        raise UserWarning(
            "Number of cores to use not specified! Setting to TensorFlow default."
        )

    with tf.Graph().as_default(), tf.Session(config=tf_config) as session:

        with tf.variable_scope('Model', reuse=None):
            m = LSTM(False, modelConfig, dataConfig)

        saver = tf.train.Saver()
        saver.restore(session, modelConfig['InputDirectory'])
        print 'Model successfuly restored!'

        data.prepBatches(m.batch_size, m.num_steps)
        ypred, _, _, _ = run_epoch(session, m, data, tf.no_op())

    return ypred
Beispiel #32
0
    def build(self):
        print '\t building rnn cell...'
        if self.cell=='gru':
            hidden_layer=GRU(self.rng,
                             self.n_input,self.n_hidden,self.n_batch,
                             self.x,self.E,self.x_mask,
                             self.is_train,self.p)
        else:
            hidden_layer=LSTM(self.rng,
                              self.n_input,self.n_hidden,self.n_batch,
                              self.x,self.E,self.x_mask,
                              self.is_train,self.p)
        print '\t building softmax output layer...'
        softmax_shape=(self.n_hidden,self.n_output)
        output_layer=H_Softmax(softmax_shape,
                               hidden_layer.activation,
                               self.y_node,self.y_choice,self.y_bit_mask,self.y_mask)
        self.params=[self.E,]
        self.params+=hidden_layer.params
        self.params+=output_layer.params

        cost=output_layer.activation
        lr=T.scalar("lr")
        gparams=[T.clip(T.grad(cost,p),-10,10) for p in self.params]
        updates=sgd(self.params,gparams,lr)

        self.train=theano.function(inputs=[self.x,self.x_mask,self.y_node,self.y_choice,self.y_bit_mask,self.y_mask,self.n_batch,lr],
                                   outputs=cost,
                                   updates=updates,
                                   givens={self.is_train:np.cast['int32'](1)})

        self.test=theano.function(inputs=[self.x,self.x_mask,self.y_node,self.y_choice,self.y_bit_mask,self.y_mask,self.n_batch],
                                   outputs=cost,
                                   givens={self.is_train:np.cast['int32'](0)})
        '''
Beispiel #33
0
class Predictor(object):
    def __init__(self, obs_length=5, pred_length=3):
        self.obs_length = obs_length
        self.pred_length = pred_length

        # Load the saved arguments to the model from the config file
        with open(os.path.join('save_lstm', 'config.pkl'), 'rb') as f:
            saved_args = pickle.load(f)

        # Initialize with the saved args
        self.model = LSTM(saved_args, True)
        self.sess = tf.InteractiveSession()
        saver = tf.train.Saver()

        # Get the checkpoint state to load the model from
        ckpt = tf.train.get_checkpoint_state('save_lstm')
        print('loading model: ', ckpt.model_checkpoint_path)

        # Restore the model at the checpoint
        saver.restore(self.sess, ckpt.model_checkpoint_path)

    def predict(self, path, full=False):
        obs_traj = path[:self.obs_length]  # observed part of the trajectory
        # Get the complete trajectory with both the observed and the predicted part from the model
        predicted_traj, mu, var = self.model.sample(self.sess,
                                                    obs_traj,
                                                    num=self.pred_length,
                                                    full=full)
        if full:
            return predicted_traj, mu, var
        else:
            return predicted_traj
Beispiel #34
0
def main(nn_type, data_type):

    print(" Starting... ")
    filename_train = 'qa1_single-supporting-fact_train.txt'
    filename_test = 'qa1_single-supporting-fact_test.txt'
    directory = 'data/babi_tasks/tasks_1-20_v1-2/en/'
    num_epochs = 500

    #processor = Preprocessor(directory, filename_train, filename_test, data_type)
    #X_train, y_train, mask_train, X_test, y_test, mask_test, input_size, max_seq_len, idx2word = processor.extract_data()

    #wProc = WikiProcessor('C:/Users/Dan/Desktop/Crore/6.864/Project/Data/wiki_qa/')
    #wProc.process()

    #proc = CNNProcessor()
    # proc.process()


    if nn_type == "lstm":
        proc = BabiProcessor(data_type)
        X_train, y_train, mask_train, X_test, y_test, mask_test, input_size, max_seq_len, idx2word = proc.process()
        lstm = LSTM(X_train, y_train, mask_train, X_test, y_test, mask_test, idx2word)
        network, l_mask, l_in = lstm.build_model(input_size, max_seq_len)
        lstm.optimize(network, l_mask, l_in)

    elif nn_type == "mem_net" and data_type == "babi":
        mn = MemNet()
        mn.run('babi')
    elif nn_type == "mem_net" and data_type == "wiki_qa":
        mn = MemNet()
        mn.run(data_type)
    elif nn_type == "mem_net" and data_type == "cnn":
        mn = MemNet()
        mn.run('cnn_qa')

    elif nn_type == "dynam_net":
        proc = BabiProcessor(data_type, "dynam_net")
        X_train, Q_train, Y_train, mask_train, X_test, Q_test, Y_test, mask_test, input_size, max_seqlen, idx2word, max_queslen = proc.process()
        dn = DynamicMemNet(X_train, Q_train, Y_train, mask_train, X_test, Q_test, Y_test, mask_test, input_size, max_seqlen, idx2word, max_queslen)
        dn.build()
        dn.train()
    elif nn_type == "dynam_net_theano":
        #num_fact_hidden_units, number_classes, number_fact_embeddings, dimension_fact_embeddings, num_episode_hidden_units

        dmn_t = DMN_full_babi()
        dmn_t.train()
        print("Finished DMN Theano")
Beispiel #35
0
class TopicLSTM(object):
    def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise()):
        self.forward = LSTM(n_in, units)
        self.backward = LSTM(units, n_topics)
        self.trans = DirichletTransition(n_topics)
        self.emit = Emmission(n_topics, n_in)
        self.sparsity = sparsity
        self.noise = noise

    @property
    def weights(self):
        return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights

    def transform(self, X, mask=None):
        Z_f = self.forward.scanl(X, mask=mask)
        Z = self.backward.scanr(Z_f, mask=None, activation=logsoftmax)
        return Z

    def loss(self, X, mask=None, flank=0, Z=None):
        if Z is None:
            Z = self.transform(self.noise(X), mask=mask)
        Tr = self.trans(Z)
        E = self.emit(Z)
        L = cross_entropy(T.shape_padright(Tr) + E, X)
        C = confusion(T.argmax(E,axis=-1), X, E.shape[-1])
        if mask is not None:
            L *= T.shape_padright(mask)
            C *= T.shape_padright(T.shape_padright(mask))
        n = X.shape[0]
        return L[flank:n-flank], C[flank:n-flank]

    def gradient(self, X, mask=None, flank=0):
        Z = self.transform(self.noise(X), mask=mask)
        L, C = self.loss(X, mask=mask, flank=flank, Z=Z)
        loss = T.sum(L)
        n = Z.shape[0]
        if self.sparcity > 0:
            R = self.sparcity*Z
            if mask is not None:
                R *= T.shape_padright(R)
            loss += T.sum(R[flank:n-flank])
        gW = theano.grad(loss, self.weights)
        return gW, [L,C]
 def __init__(self):
     self.model_lstm = LSTM(3136, 1045)
     #you should select path of saving parameters
     d_name = 'lstm_params/1epoch_params/'
     self.model_lstm.l1_x.W = np.load(d_name+'l1_x_W.npy')
     self.model_lstm.l1_x.b = np.load(d_name+'l1_x_b.npy')
     self.model_lstm.l1_h.W = np.load(d_name+'l1_h_W.npy')
     self.model_lstm.l1_h.b = np.load(d_name+'l1_h_b.npy')
     self.model_lstm.l6.W = np.load(d_name+'l6_W.npy')
     self.model_lstm.l6.b = np.load(d_name+'l6_b.npy')
     cuda.get_device(0).use()
     self.model_lstm=self.model_lstm.to_gpu()
train_data, valid_data = create_valid(zip(train_x, train_y, train_mask))
num_classes = len(label2idx)
embedding = load_embedding(FLAGS.embedding_size, filename=FLAGS.embedding_file)
test_x, test_y, test_mask = load_data(FLAGS.test_file, word2idx, label2idx, FLAGS.sequence_len)
logging.info("load test data finish")
#----------------------------------- load data end ----------------------

#----------------------------------- execute train ---------------------------------------
with tf.Graph().as_default():
    with tf.device("/cpu:0"):
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_options)
        session_conf = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options)
        with tf.Session(config=session_conf).as_default() as sess:
            initializer = tf.random_uniform_initializer(-1 * FLAGS.init_scale, 1 * FLAGS.init_scale)
            with tf.variable_scope("model", reuse = None, initializer = initializer):
                model = LSTM(FLAGS.batch_size, FLAGS.sequence_len, embedding, FLAGS.embedding_size, FLAGS.attention_dim, FLAGS.rnn_size, FLAGS.num_rnn_layers, num_classes, FLAGS.max_grad_norm, dropout = FLAGS.dropout, is_training=True)

            with tf.variable_scope("model", reuse = True, initializer = initializer):
                valid_model = LSTM(FLAGS.batch_size, FLAGS.sequence_len, embedding, FLAGS.embedding_size, FLAGS.attention_dim, FLAGS.rnn_size, FLAGS.num_rnn_layers, num_classes, FLAGS.max_grad_norm, is_training=False)
                test_model = LSTM(FLAGS.batch_size, FLAGS.sequence_len, embedding, FLAGS.embedding_size, FLAGS.attention_dim, FLAGS.rnn_size, FLAGS.num_rnn_layers, num_classes, FLAGS.max_grad_norm, is_training=False)

            #add summary
            train_summary_dir = os.path.join(FLAGS.out_dir,"summaries","train")
            train_summary_writer =  tf.train.SummaryWriter(train_summary_dir,sess.graph)

            #add checkpoint
            checkpoint_dir = os.path.abspath(os.path.join(FLAGS.out_dir, "checkpoints"))
            checkpoint_prefix = os.path.join(checkpoint_dir, "model")
            if not os.path.exists(checkpoint_dir):
                os.makedirs(checkpoint_dir)
            saver = tf.train.Saver(tf.all_variables())
Beispiel #38
0
def test_LSTM():
	
	T = 5
	batch_size = 2
	nstates = 5
	input_size = 4

	unit = LSTM(input_size, nstates)

	W = unit.get_weights()

	X = np.random.randn(T, input_size, batch_size)

	unit.forget()
	acc_Y = unit.forward(X)
	wrand = np.random.randn(*acc_Y.shape)
	loss = np.sum(acc_Y * wrand)
	dY = wrand
	dX = unit.backward(dY)
	dW = unit.get_grads()
	unit.forget()

	def fwd():
		unit.set_weights(W)
		h = unit.forward(X)
		unit.forget()
		return np.sum(h * wrand)

	delta = 1e-4
	error_threshold = 1e-3
	all_values = [X, W]
	backpropagated_gradients = [dX, dW]
	names = ['X', 'W']

	error_count = 0
	for v in range(len(names)):
		values = all_values[v]
		dvalues = backpropagated_gradients[v]
		name = names[v]
		
		for i in range(values.size):
			actual = values.flat[i]
			values.flat[i] = actual + delta
			loss_minus = fwd()
			values.flat[i] = actual - delta
			loss_plus = fwd()
			values.flat[i] = actual
			backpropagated_gradient = dvalues.flat[i]
			numerical_gradient = (loss_minus - loss_plus) / (2 * delta)
			

			if numerical_gradient == 0 and backpropagated_gradient == 0:
				error = 0 
			elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7:
				error = 0 
			else:
				error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient)
			
			if error > error_threshold:
				print 'FAILURE!!!\n'
				print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape)
				print '\tvalues: ', actual
				print '\tbackpropagated_gradient: ', backpropagated_gradient 
				print '\tnumerical_gradient', numerical_gradient 
				print '\terror: ', error
				print '\n\n'

				error_count += 1

	if error_count == 0:
		print 'LSTM Gradient Check Passed'
	else:
		print 'Failed for {} parameters'.format(error_count)
Beispiel #39
0
idxEnd   = idxBegin + nIter

# Filenames.
modelBaseName = "lstm-model--id_{0}-batch_{1}-seq_{2}-lr_{3}-nh_{4}".format(youTubeId, batchSize, sequenceSeconds, learningRate, hiddenUnits)
modelFileName = modelBaseName + ".pkl"
graphFileName = modelBaseName + ".png"
soundFileName = modelBaseName + ".wav"

vals = []
error = np.array([0])
minError = np.inf
idx = 0
scaling = 0

# create LSTM
lstm = LSTM(miniBatches, hiddenUnits, miniBatches)

# retrive datastream
print("retrieving data...")
data = YouTubeAudio(youTubeId)
stream = data.get_example_stream()
data_stream = Window(stride, sequenceSize, sequenceSize, True, stream)

# switch to configure training or audio generation
if mode == "train":

	print("training begin...")
	print("Input Size:", batchSize)
	print("minibatches:", miniBatches) 
	print("stride:", stride)
	print("hidden units:", hiddenUnits)
Beispiel #40
0
def main():
    n_steps = 5
    n_epochs = 1000
    n_data = 1000
    n_valid_data = 20
    # Generate some data.
    data_x, data_y = generate_data(n_data, tricky=True)
    logging.info(("First 10 data samples:"))
    logging.info(zip(*data_x)[:10])
    logging.info(data_y[:10])

    validation_data_x, validation_data_y = generate_data(n_valid_data,
                                                         tricky=True)

    lstm = LSTM(
        learning_rate=0.1
    )
    lstm.build_train()

    s0 = np.zeros((n_data, lstm.n_cells, ), dtype=np.float32)
    s0_valid = np.zeros((n_valid_data, lstm.n_cells, ),
                        dtype=np.float32)
    for e in range(1000):
        (loss, ) = lstm.train_step(s0, data_x, data_y)

        validation_data_est, validation_loss = lstm.validation_loss(
                s0_valid, validation_data_x, validation_data_y)
        logging.info("Epoch #%d: loss(%.5f) valid_loss(%.5f)" % (e, loss, validation_loss))
        res_seq = enumerate(zip(validation_data_est[0], validation_data_y))
        for i, (y_est, y_t) in res_seq:
            logging.info("%d: lbl(%.2f) clf(%.2f)" % (i, y_est, y_t, ))


        #for i, (x, y) in enumerate(zip(validation_data_x, validation_data_y)):


    return

    clf = lstm.build_clf_model(lstm.process_input())
    f_clf = function([lstm.s0, lstm.n_steps, lstm.x], clf)
    """
    x = np.ones((5, lstm.input_size), dtype=np.float32)
    x[0,0] = 0
    x[1,0] = 1
    x[2,0] = 2
    x[3,0] = 3
    x[4,0] = 4
    print f_clf(
        np.zeros((lstm.n_cells, ), dtype=np.float32),
        5,
        x
    )
    return"""


    loss = lstm.build_loss(clf)
    f_loss = function([lstm.s0, lstm.n_steps, lstm.x, lstm.tgt], loss)

    input = np.ndarray((lstm.input_size), dtype=np.float32)
    input[:] = 1.0

    loss_prime = theano.grad(loss, wrt=lstm.params.values())
    f_loss_prime = function([lstm.s0, lstm.n_steps, lstm.x, lstm.tgt],
                            loss_prime)

    for e in range(n_epochs):
        logging.info("Epoch #%d" % e)
        g = {}

        total_loss = 0.0
        for x, y, n_steps in data:
            g_point = f_loss_prime(
                np.zeros((lstm.n_cells, ), dtype=np.float32),
                n_steps,
                x,
                y)
            total_loss += f_loss(
                np.zeros((lstm.n_cells, ), dtype=np.float32),
                n_steps,
                x,
                y) * 1.0 / len(data)

            for i in range(len(g_point)):
                if not i in g:
                    g[i] = np.zeros_like(g_point[i], dtype=np.float32)

            for i in range(len(g_point)):
                g[i] += g_point[i] #* 1.0 / len(data)

        validation_loss = 0.0
        for i, (x, y, n_steps) in enumerate(validation_data):
            args = [
                np.zeros((lstm.n_cells, ), dtype=np.float32),
                n_steps,
                x,
                y
            ]
            validation_loss += f_loss(*args) * 1.0 / len(validation_data)

            if e % 50 == 0:
                logging.info("%d: tgt(%.0f) clf(%.2f)" % (i, y, f_clf(*args[
                                                                       :-1])))

        logging.info("train_loss(%.5f) valid_loss(%.5f)" % (
            total_loss, validation_loss))

        update(lstm.params, g, 0.1)
Beispiel #41
0
        reader_valid = Reader(valid_md)
        reader_valid.word_dict = reader.word_dict
        reader_valid.tag_dict = reader.tag_dict
        reader_valid.codify_sentences()

        codified_sentences_valid = [n([t.codified_word for t in s]) for s in reader_valid.sentences]
        codified_tags_valid = [n([t.codified_tag for t in s]) for s in reader_valid.sentences]

    x = T.ivector('x')
    y = T.ivector('y')
    mask  = T.ivector('mask')

    emb = Embedding(x, args.num_features, num_words+1)
    if args.dropout:
        dropout = Dropout(emb.output, args.num_features, args.dropout)
        lstm = LSTM(dropout.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features)
    else:
        lstm = LSTM(emb.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features)

    if args.load_models:
        print('... Loaded Models')
        emb.load(directory_model, varlist)
        lstm.load(directory_model, varlist)

    te, nll = lstm.errors(y)
    params = emb.params + lstm.params
    params_helper = emb.params_helper + lstm.params_helper

    rho = 10
    lr = np.float32(float(args.learning_rate))
Beispiel #42
0
 def load(state):
     lstm = LSTM.load(state['lstm'])
     output = Softmax.load(state['output'])
     obj = CharacterGenerator(lstm, output)
     return obj
Beispiel #43
0
parser.add_argument("--decay_rate", type=float, default=0.95)
parser.add_argument("--dropout", type=float, default=0.0)
parser.add_argument("--epochs", type=int, default=1)
parser.add_argument("--grad_clip", type=int, default=5)
parser.add_argument("--init_from", type=str, default="")

args = parser.parse_args()

train_data, val_data = load_data()

n_epochs = args.epochs
n_units = args.lstm_size
grad_clip = args.grad_clip

# LSTMを初期化
model = LSTM(3136, n_units)
if args.gpu >= 0:
    cuda.get_device(args.gpu).use()
    model.to_gpu()

# 学習アルゴリズムのセットアップ
optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8)
optimizer.setup(model.collect_parameters())

whole_len = len(train_data)
whole_val_len = len(val_data)
epoch = 0
start_at = time.time()
cur_at = start_at
end_time = 0
state = make_initial_state(n_units)
  def __init__(self, imgX, imgY, input = None, n_hidden_enc = 100, n_hidden_dec = 100, n_z=100, n_steps = 8, batch_size = 100, rng = rng):

    #initialize parameters and 

    if input == None:
      input = theano.shared(numpy.zeros((batch_size,imgX*imgY)))

    self.c0 = theano.shared(name='c0',
                                value=numpy.random.uniform(-1.0, 1.0,
                                (imgX*imgY))
                                .astype(theano.config.floatX))

    self.rnn_enc = LSTM(n_hidden_dec+2*imgX*imgY,n_hidden_enc)
    self.rnn_dec = LSTM(n_z,n_hidden_dec)
    self.Z = RandomVariable(rng,n_in=n_hidden_enc,n_out=n_z)
    self.readHead = ReadHead(n_hidden_enc)
    self.writeHead = WriteHead(imgX,imgY,n_hidden_dec)
    self.X = RandomVariable(rng,n_in=imgX*imgY,n_out=imgX*imgY,sigmoid_mean=True)
    self.randSeq = rng.normal((n_steps,batch_size,n_z))

    self.params = [self.c0] + self.readHead.params + self.rnn_enc.params + self.Z.params + self.rnn_dec.params + self.X.params + self.writeHead.params

    #turns vector into n_batches x vector_length matrix
    #concatenate operation won't broadcast so we add a 0 matrix with
    #the correct number of rows      
    def vec2Matrix(v):
      t = v.dimshuffle(['x',0])
      t = T.dot(input.dimshuffle([1,0])[0].dimshuffle([0,'x']),t)
      return v + T.zeros_like(t)

    def autoEncode(epsilon,ctm1,stm1_enc,htm1_enc,stm1_dec,htm1_dec,ztm1,x):
      x_err = x - T.nnet.sigmoid(ctm1) 
      rt = self.readHead.read(x,x_err,htm1_dec)
      [s_t_enc,h_t_enc] = self.rnn_enc.recurrence(
                T.concatenate([rt,htm1_dec],axis=1),stm1_enc,htm1_enc[-1])
      z_t = self.Z.conditional_sample(h_t_enc,epsilon)
      [s_t_dec,h_t_dec] = self.rnn_dec.recurrence(z_t,stm1_dec,htm1_dec)
      c_t = ctm1 + self.writeHead.write(h_t_dec)
      return [c_t,s_t_enc,htm1_enc+[h_t_enc],s_t_dec,htm1_dec,ztm1+[z_t]]

    c_t,s_t_enc,h_t_enc,s_t_dec,h_t_dec,z_t = [vec2Matrix(self.c0),vec2Matrix(self.rnn_enc.s0),
          [vec2Matrix(self.rnn_enc.h0)],vec2Matrix(self.rnn_dec.s0),
          vec2Matrix(self.rnn_dec.h0),[]]

    #would like to use scan here but runs into errors with computations involving random variables
    #also takes much longer to find gradient graph

    for i in range(n_steps):
      c_t,s_t_enc,h_t_enc,s_t_dec,h_t_dec,z_t = autoEncode(self.randSeq[i],c_t,s_t_enc,h_t_enc,s_t_dec,h_t_dec,z_t,input)

    def generate(epsilon,ctm1,stm1_dec,htm1_dec):
      [s_t_dec,h_t_dec] = self.rnn_dec.recurrence(epsilon,stm1_dec,htm1_dec)
      c_t = ctm1 + self.writeHead.write(h_t_dec)
      return [c_t,s_t_dec,h_t_dec]

    c_t2,s_t_dec2,h_t_dec2 = [vec2Matrix(self.c0),vec2Matrix(self.rnn_dec.s0),
          vec2Matrix(self.rnn_dec.h0)]

    for i in range(n_steps):
      c_t2,s_t_dec2,h_t_dec2 = generate(self.randSeq[i],c_t2,s_t_dec2,h_t_dec2)


    self.h_t_enc = T.stacklists(h_t_enc)
    self.cT = c_t
    self.lossX = T.sum(-self.X.log_conditional_prob(input,self.cT))
    self.lossZ = T.sum(self.Z.latent_loss(self.h_t_enc))
    self.loss = (self.lossX+self.lossZ)/batch_size
    #diff = (T.dot(self.cT,self.X.w_mean)-input)
    #var = T.exp(T.dot(self.cT,self.X.w_var))
    self.test = self.loss
    self.generated_x = self.X.conditional_sample(self.cT,rng.normal((batch_size,imgX*imgY)))
    self.generated_x2 = self.X.conditional_sample(c_t2,rng.normal((batch_size,imgX*imgY)))
    self.mean = T.dot(self.cT,self.X.w_mean)
    self.var = T.exp(T.dot(self.cT,self.X.w_var))
Beispiel #45
0
    try:
        with open(os.path.join(directory_model, 'reader.pkl'), 'rb') as f:
            reader = pickle.load(f)
    except:
        md = Metadata(args, args.filename, args.fixed_embeddings or args.learn_embeddings)
        reader = Reader(md, minimum_occurrence=2)

    num_tags = len(reader.tag_dict)
    num_words = len(reader.word_dict)
    print('... loading models')

    x = T.ivector('x')

    emb = Embedding(x, args.num_features, num_words+1)
    lstm = LSTM(emb.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features)

    emb.load(directory_model, varlist)
    lstm.load(directory_model, varlist)

    classify = th.function(
            inputs = [x],
            outputs = [lstm.y_pred, lstm.p_y_given_x])

    print('#words: {}, #tags : {}, #hidden : {}, embedding size: {} '.format(\
            len(reader.word_dict), len(reader.tag_dict), args.hidden, args.num_features))

    print('>>> READY')
    while True:
        sent = input()
        coded = reader.codify_string(sent)