def main(): argv = sys.argv if len(argv) != 4: print('Usage: ' + argv[0] + ' model_name dataset original_dataset') sys.exit(0) model_name = argv[1] data = datautils.load(argv[2]) original_data = datautils.load(argv[3]) normalized, mean, std = datautils.normalize(data) (eval_sequences, cuts_indexes) = split_evaluation_sequences(normalized) """eval""" model = LSTM() clean = np.empty(0) for head, tail in eval_sequences: if len(clean) == 0: y_init = 0 else: y_init = clean[-1] head_diff = datautils.differentiate(head, y_init) projection = model.evaluate(model_name, head_diff, tail) head = datautils.undifferentiate(head_diff, y_init) projection = datautils.undifferentiate(projection, head[-1]) clean = np.concatenate((clean, head, projection)) """plot""" clean_denorm = datautils.denormalize(clean, mean, std) utils.plot_multiple([original_data, clean_denorm], [0, 0], vertical_lines=cuts_indexes)
def __init__(self, input_size: int = INPUT_SIZE, output_size: int = OUTPUT_SIZE, hidden_size: int = HIDDEN_SIZE, embed_size: int = EMBED_SIZE, lr: float = LEARNING_RATE, clip_grad: float = CLIP_GRAD, init_range: float = INIT_RANGE): input_layers = [ Embedding(input_size, embed_size, init_range), LSTM(embed_size, hidden_size, init_range) ] output_layers = [ Embedding(output_size, embed_size, init_range), LSTM(embed_size, hidden_size, init_range, previous=input_layers[1]), Softmax(hidden_size, output_size, init_range) ] self.input_layers, self.output_layers = input_layers, output_layers self.hidden_size = hidden_size self.embed_size = embed_size self.input_size = input_size self.output_size = output_size self.lr = lr self.clip_grad = clip_grad
def __init__(self, input_dim, hidden_dim, output_dim, layers=1, bidirectional=False, layernorm=False): super().__init__() self.name = 'lstm' self.input_dim = input_dim self.hidden_dim = hidden_dim self.output_dim = output_dim self.layers = layers self.bidirectional = bidirectional self.layernorm = layernorm self.lstm = LSTM(input_dim=input_dim, hidden_dim=hidden_dim, layers=layers, bidirectional=bidirectional, layernorm=layernorm) if self.bidirectional: self.fc = nn.Linear(2 * hidden_dim, output_dim) else: self.fc = nn.Linear(hidden_dim, output_dim)
def create_model(self): self.log('Creating model') self.log('vocab size : ' + str(len(self.vocab_to_ints))) self.model = LSTM(input_units=self.maxlen, hidden_units=self.hidden_dim, vocab_size=len(self.vocab_to_ints) + 1, embedding_dim=self.embedding_size) #.to(device)
def courseratest(): np.random.seed(1) x_dim = 3 n_examples = 10 time_steps = 7 hidden_dim = 5 da = np.random.randn(5, 10, 4) x = np.ones((time_steps, n_examples, x_dim)) for i in range(time_steps): for j in range(n_examples): for k in range(x_dim): x[i, j, k] = np.random.randn() from functions import xavier_init net = LSTM(hidden_dim, x_dim) states, caches, preds, ys = net.forward( x, np.zeros((time_steps, n_examples, 1))) # print(states[-1]['z']) # print(states[-1]['c_out']) # print(states[-1]['f']) # print(states[-1]['u']) # print(states[-1]['o']) da_next = np.zeros_like(da[:, :, 0]) dc_next = np.zeros_like(states[0]['c']) grads = net.cell.init_grads() for t in reversed(range(4)): da_next, dc_next, grad_adds = net.cell.backward( states[t], da[:, :, t] + da_next, dc_next) for gate in ['c', 'u', 'o', 'f']: grads[gate]['w'] += grad_adds[gate]['w'] grads[gate]['b'] += grad_adds[gate]['b'] print(grad_adds['f']['b'])
def compareFixed(): t = Tasks() x_test, y_test = t.sequence_type_1(100) add_params, mul_params = torch.load('program_memory/add.pt'), torch.load( 'program_memory/mul.pt') hnm = HNM(10, 20, add_params, mul_params) hnm.load_state_dict(torch.load("learned_params/hnm_arch_2.pt")) ntm = NTM(10, 20) ntm.load_state_dict(torch.load("learned_params/ntm.pt")) lstm = LSTM(14, 256, 325, 1) lstm.load_state_dict(torch.load("learned_params/lstm.pt")) hnm_diff, lstm_diff, ntm_diff = 0, 0, 0 for i in range(len(x_test)): hnm_out = hnm.recurrent_forward(x_test[i:i + 1]) ntm_out = ntm.recurrent_forward(x_test[i:i + 1]) lstm_out = lstm.recurrent_forward(x_test[i:i + 1]) answer = np.argmax(y_test[i:i + 1].detach().numpy()) hnm_diff += abs(answer - np.argmax(hnm_out.detach().numpy())) ntm_diff += abs(answer - np.argmax(ntm_out.detach().numpy())) lstm_diff += abs(answer - np.argmax(lstm_out.detach().numpy())) print(hnm_diff / len(y_test), ntm_diff / len(y_test), lstm_diff / len(y_test))
def __init__(self, is_training=False): print( datetime.datetime.fromtimestamp(time.time()).strftime( '%Y-%m-%d %H:%M:%S') + ": Checking for data sets, downloading if needed...") data.check_all_unzip() print( datetime.datetime.fromtimestamp(time.time()).strftime( '%Y-%m-%d %H:%M:%S') + ": Initializing preproc...") self.__preproc = Preprocessor(evidence_length=EVIDENCE_LENGTH, hypothesis_length=HYPOTHESIS_LENGTH, vector_size=VECTOR_SIZE) print( datetime.datetime.fromtimestamp(time.time()).strftime( '%Y-%m-%d %H:%M:%S') + ": Processing GloVe Vector Data...") self.__preproc.setup_word_map(file=datasets.glove_vectors_840B_300d) self.__df_list = None self.__c_scores = None if is_training: print( datetime.datetime.fromtimestamp(time.time()).strftime( '%Y-%m-%d %H:%M:%S') + ": Updating data scores for training...") self.__df_list, self.__c_scores = self.__preproc.update_data_scores( file=datasets.snli_full_dataset_file) print( datetime.datetime.fromtimestamp(time.time()).strftime( '%Y-%m-%d %H:%M:%S') + ": Initializing LSTM...") self.__lstm = LSTM(e_length=self.__preproc.get_evidence_length(), h_length=self.__preproc.get_hypothesis_length(), v_size=self.__preproc.get_vector_size())
def simplefunc(): time_steps = 10 x_dim = 8 hidden_dim = 8 output_dim = 8 n_examples = 2048 batch_size = 256 x = np.random.randn(time_steps, n_examples, x_dim) y = np.random.randn(time_steps, n_examples, output_dim) x = np.ones((time_steps, n_examples, x_dim)) y = np.ones((time_steps, n_examples, output_dim)) / 4.5 y[4:, :, :] = y[4:, :, :] * 3.6 net = LSTM(hidden_dim, x_dim, output_dim=output_dim, learning_rate=1e-5) losses = [] for i in range(5000): start = time.time() loss = 0 for data, targets in minibatch_gen(x, y, batch_size): loss += np.mean(net.fit(data, targets)) losses.append(loss) print('Epoch {}: loss: {} time: {}'.format(i, loss, time.time() - start), end='\r', flush=True) print('\nEpoch {}: loss: {} time: {}'.format(i, loss, time.time() - start), end='\r', flush=True)
def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise()): self.forward = LSTM(n_in, units) self.backward = LSTM(units, n_topics) self.trans = DirichletTransition(n_topics) self.emit = Emmission(n_topics, n_in) self.sparsity = sparsity self.noise = noise
def main(): argv = sys.argv if len(argv) != 3: print('Usage: ' + argv[0] + ' model_name dataset') sys.exit(0) model_name = argv[1] data = datautils.load(argv[2]) normalized, mean, std = datautils.normalize(data) normalized, _ = datautils.differentiate(normalized) (train, test) = datautils.split(normalized, 0.7) # utils.plot_data(data) print("training set length: {}".format(len(train))) print("test set length: {}".format(len(test))) """train""" model = LSTM() time_steps = 20 # window size batch_size = 5 # data augmentation history = model.train(model_name, train, 130, batch_size, time_steps) utils.plot_history(history) """test""" head = int(len(test) * 0.6) tail = len(test) - head projection = model.evaluate(model_name, test[:head], tail) """plot""" test = datautils.undifferentiate(test, sum(train)) projection = datautils.undifferentiate(projection, sum(train) + sum(test)) testset_denorm = datautils.denormalize(test, mean, std) results_denorm = datautils.denormalize(projection, mean, std) utils.plot_multiple([testset_denorm, results_denorm], [0, head + 1])
def pipeline(output_window): ''' Apply training of lstm and portfolio optimization :param output_window: time windows length of LSTM predictions :return: ''' data_autoen = get_data(scale=True) train_data, test_data, train_labels, test_labels = get_train_test(data_autoen, windows_size=net_config.windows_size, output_window=output_window) #data = get_data(windows_size=net_config.windows_size) batches = create_batches(train_data, batch_size=net_config.batch_size) labels_batches = create_batches(train_labels, batch_size=net_config.batch_size) with tf.Graph().as_default(): lstm_model = LSTM(input_size=data_autoen.shape[-1], output_size=data_autoen.shape[-1], rnn_hidden=net_config.rnn_hidden, window_size=net_config.windows_size, window_output=output_window, learning_rate=net_config.learning_rate, ) with tf.Session(config=config_proto) as sess: if net_config.lstm_from_file: lstm_model.saver.restore(sess, tf.train.latest_checkpoint('lstm-price/')) else: LSTM.train_lstm(sess, lstm_model, batches, labels_batches, test_data, test_labels, net_config.lstm_epochs, net_config.dropout_prob) lstm_model.saver.save(sess, "lstm-price/model", global_step=net_config.lstm_epochs) ''' feed_dict = { lstm_model.inputs: test_data[0:1, :, :], lstm_model.dropout_prob: 1.0 } preds = sess.run(lstm_model.predicted_outputs, feed_dict=feed_dict) n_stocks = data_autoen.shape[-1] fig, axes = plt.subplots(int(n_stocks / 2), 2) i = 0 for n in range(int(n_stocks / 2)): for j in range(2): axes[n, j].plot(preds[0, :, i], label='predicted') axes[n, j].plot(test_labels[0, :, i], label='true') i += 1 plt.legend() plt.show() ''' df_close = pd.DataFrame(test_data[:, -1, :]) df_close = pd.DataFrame(scaler.inverse_transform(df_close)) df_close = df_close.pct_change().dropna() train_dataset = concat(train_data, train_labels) all_weights, pnls = backtesting_optim_portfolio( df_close, test_data, test_labels, train_dataset, lstm_model, sess, net_config.risk_aversion, output_window, net_config) print(all_weights.shape) performances = compute_performance(pnls) print(performances) return np.array(performances), all_weights, pnls
def __init__(self, args): super().__init__() self.features = Features(args) self.rnn1 = LSTM(Features.size + args.button_num, self.rnn_size) self.rnn2 = LSTM(self.rnn_size, self.rnn_size) self.features_bn = nn.BatchNorm1d(Features.size + args.button_num) self.pred = nn.Linear(self.rnn_size, args.variable_num * 3)
def compare(): obstacle, wall_cw, wall_awc = Obstacle(), WallCW(), WallACW() obstacle_params, wall_cw_params, wall_acw_params = torch.load( 'program_memory/move.pt'), torch.load( 'program_memory/cw.pt'), torch.load('program_memory/acw.pt') networks = [obstacle, wall_cw, wall_awc] params = [obstacle_params, wall_cw_params, wall_acw_params] hnm = HNM(10, 14, networks, params) hnm.load_state_dict(torch.load('learned_params/hnm.pt')) ntm = NTM(10, 14) ntm.load_state_dict(torch.load('learned_params/ntm.pt')) lstm = LSTM(14, 64, 3, 1) lstm.load_state_dict(torch.load('learned_params/lstm.pt')) testX, testY = getTestData() hnm_correct, ntm_correct, lstm_correct = 0, 0, 0 totSamples = 0 for i in range(0, 25): s = torch.from_numpy(np.array(testX[i:i + 1][0])).float().unsqueeze(0) s_lstm = s.view(s.size()[0], s.size()[2], -1) l = np.array(testY[i:i + 1][0]) print(i) (hnm_read_weights, hnm_write_weights) = hnm._initialise() (ntm_read_weights, ntm_write_weights) = ntm._initialise() lstm_h = lstm.h0.expand(s_lstm.size()[0], 64) lstm_c = lstm.c0.expand(s_lstm.size()[0], 64) for j in range(s.size()[1]): (hnm_out, hnm_read_weights, hnm_write_weights) = hnm.forward(s[:, j, :], hnm_read_weights, hnm_write_weights) (ntm_out, ntm_read_weights, ntm_write_weights) = ntm.forward(s[:, j, :], ntm_read_weights, ntm_write_weights) lstm_h, lstm_c, lstm_out = lstm.forward(s_lstm[:, :, j], lstm_h, lstm_c) if np.argmax(hnm_out.detach().numpy()) == np.argmax(l[j]): hnm_correct += 1 if np.argmax(ntm_out.detach().numpy()) == np.argmax(l[j]): ntm_correct += 1 if np.argmax(lstm_out.detach().numpy()) == np.argmax(l[j]): lstm_correct += 1 totSamples += 1 print(hnm_correct, ntm_correct, lstm_correct) print(totSamples)
def __init__(self): self.dropout_rate = DROPOUT_RATE self.psrc_lookup = Parameter() self.ptrg_lookup = Parameter() self.pwhy = Parameter() self.pby = Parameter() self.src_lstm = LSTM() self.trg_lstm = LSTM() self.scan_attributes()
def create_lstm(self): """ Creates all neural networks and the vanilla lstm """ nn_f = self.create_neural_network(layer.SigmoidLayer(), constants.SIG_F_POS) nn_i = self.create_neural_network(layer.SigmoidLayer(), constants.SIG_I_POS) nn_c = self.create_neural_network(layer.TanhLayer(), constants.TANH_C_POS) nn_o = self.create_neural_network(layer.SigmoidLayer(), constants.SIG_O_POS) self.lstm = LSTM(nn_f, nn_i, nn_c, nn_o, constants.TAU_QUANTILE, constants.LEARNING_RATE)
def quandltest(): LOAD = False tickers = ['INTC', 'AMD'] date = {'gte': '2016-10-10', 'lte': '2017-09-01'} columns = {'columns': ['ticker', 'date', 'close']} datasets = [] for ticker in tickers: datasets.append( quandl.get_table('WIKI/PRICES', qopts=columns, ticker=ticker, date=date)) for dataset in datasets: dataset.rename(columns={'close': dataset['ticker'].iloc[0]}, inplace=True) dataset.drop('ticker', axis=1, inplace=True) df = reduce(lambda l, r: pd.merge(l, r, on='date'), datasets) df.index = df['date'] df.drop('date', axis=1, inplace=True) df = (df / df.iloc[-1]).diff()[1:] x_dim, output_dim = len(df.iloc[0]), len(df.iloc[0]) hidden_dim = 80 time_steps = 30 batch_size = 20 if LOAD: with open('amd_intel_net.pkl', 'rb') as f: net = pickle.load(f) else: net = LSTM(hidden_dim, x_dim, output_dim=output_dim, learning_rate=2e-2) for i in range(300000): batch = np.array( random_time_batch(df, time_steps=time_steps + 1, batch_size=batch_size)) start = time.time() x = batch[:-1, :, :] y = batch[1:, :, :] loss = np.sum(net.fit(x, y)) print('Epoch {}: loss: {} time: {}'.format(i, loss, time.time() - start), end='\r', flush=True) if i != 0 and i % 5000 == 0: with open('amd_intel_net.pkl', 'wb') as f: pickle.dump(net, f) if i % 3000 == 0: net.cell.learning_rate = net.cell.learning_rate * 0.5 net.activation.learning_rate = net.activation.learning_rate * 0.5
def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise(), trans_weight=1.0): self.forward = LSTM(n_in, units) self.backward = LSTM(units, n_topics) self.linear = Linear(n_topics, n_topics) self.trans = DirichletTransition(n_topics) self.emit = Emmission(n_topics, n_in) self.sparsity = sparsity self.noise = noise self.n_topics = n_topics self.n_in = n_in self.trans_weight = trans_weight
def _build_model(self): """Build traning model. First embed user/item inputs into training dimension, then feed them in LSTMCell. Further affine the matrics into emission dimension after LSTM and emit the prediction. """ phase = 'ENCODE' with tf.variable_scope(phase): self.encode_user = Transform(self.user_input, self.user_hparas, phase) self.encode_item = Transform(self.item_input, self.item_hparas, phase) phase = 'LSTM' with tf.variable_scope(phase): self.lstm_user = LSTM(self.encode_user.output, self.user_hparas) self.lstm_item = LSTM(self.encode_item.output, self.item_hparas) phase = 'AFFINE' with tf.variable_scope(phase): self.trans_user = Transform(self.lstm_user.output, self.user_hparas, phase) self.trans_item = Transform(self.lstm_item.output, self.item_hparas, phase) phase = 'EMISSION' with tf.variable_scope(phase): self.dynamic_state = tf.einsum('ijl,kjl->jik', self.trans_user.output, self.trans_item.output, name='dynamic_state') self.stationary_state = tf.matmul(self.user_stationary_factor, self.item_stationary_factor, transpose_b=True, name='stationary_state') if self.loss_function == 'log_loss': logits = tf.add(self.dynamic_state * 0.5, self.stationary_state * 0.5, name='logits') self.logits = tf.nn.sigmoid(logits, name='logits_activation') elif self.loss_function == 'rmse': logits = tf.add(self.dynamic_state, self.stationary_state, name='logits') self.logits = tf.nn.relu(logits, name='logits_activation') else: raise NotImplementedError( "Didn't implement the loss function yet.") self.logits_last = self.logits[-1, :, :]
def __init__(self): self.model_lstm = LSTM(3136, 1045) #you should select path of saving parameters d_name = 'lstm_params/1epoch_params/' self.model_lstm.l1_x.W = np.load(d_name + 'l1_x_W.npy') self.model_lstm.l1_x.b = np.load(d_name + 'l1_x_b.npy') self.model_lstm.l1_h.W = np.load(d_name + 'l1_h_W.npy') self.model_lstm.l1_h.b = np.load(d_name + 'l1_h_b.npy') self.model_lstm.l6.W = np.load(d_name + 'l6_W.npy') self.model_lstm.l6.b = np.load(d_name + 'l6_b.npy') cuda.get_device(0).use() self.model_lstm = self.model_lstm.to_gpu()
class TopicLSTM(object): def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise(), trans_weight=1.0): self.forward = LSTM(n_in, units) self.backward = LSTM(units, n_topics) self.linear = Linear(n_topics, n_topics) self.trans = DirichletTransition(n_topics) self.emit = Emmission(n_topics, n_in) self.sparsity = sparsity self.noise = noise self.n_topics = n_topics self.n_in = n_in self.trans_weight = trans_weight @property def weights(self): return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights def transform(self, X, mask=None): Z_f, _ = self.forward.scanl(X, mask=mask) Z, _ = self.backward.scanr(Z_f, mask=mask) #, activation=softmax) return logsoftmax(self.linear(Z)) def loss(self, X, mask=None, flank=0, Z=None): if Z is None: Z = self.transform(self.noise(X), mask=mask) E = self.emit(Z) L = cross_entropy(E, X) C = confusion(T.argmax(E,axis=-1), X, E.shape[-1]) if mask is not None: L *= T.shape_padright(mask) C *= T.shape_padright(T.shape_padright(mask)) n = X.shape[0] return L[flank:n-flank], C[flank:n-flank] def gradient(self, X, mask=None, flank=0): Z = self.transform(self.noise(X), mask=mask) n = Z.shape[0] L, C = self.loss(X, mask=mask, flank=flank, Z=Z) loss = T.sum(L) #/ self.n_in Tr = self.trans(Z) if mask is not None: Tr *= mask if self.trans_weight > 0: loss -= self.trans_weight*T.sum(Tr[flank:n-flank]) #/ self.n_topics m = n-2*flank #loss += self.trans.regularizer()*m/self.n_topics if self.sparsity > 0: R = self.sparsity*Z if mask is not None: R *= T.shape_padright(mask) loss += T.sum(R[flank:n-flank]) gW = theano.grad(loss, self.weights, disconnected_inputs='warn') return gW, [L.sum(axis=[0,1]),C.sum(axis=[0,1])]
def train_lstm(train, test, model_parameters=[4, 'mean_squared_error', 'adam', 100, 1]): history = int(model_parameters[0]) loss = model_parameters[1] optimizer = model_parameters[2] epochs = int(model_parameters[3]) batch_size = int(model_parameters[4]) lstm = LSTM(train, history, loss, optimizer) lstm.lstm_train(epochs, batch_size) real_label, predicted_label = lstm.lstm_predict(test) print_report('LSTM', real_label, predicted_label)
def trainLSTM(): lstm = LSTM(14, 64, 3, 1) X, y = [], [] for i in range(10): tempX, tempY = getData("data/observations_"+str(i*500)+".npy", "data/actions_"+str(i*500)+".npy") X.extend(tempX) y.extend(tempY) print(len(X), len(y)) lstm.train(X, y, maxEpoch=10, learning_rate=0.0006, mini_batch_size=1)
def __init__(self, name, src_vocab_size, trg_vocab_size, embed_size, hidden_size, dropout_rate): self.name_ = name self.dropout_rate_ = dropout_rate self.psrc_lookup_ = Parameter([embed_size, src_vocab_size], I.XavierUniform()) self.ptrg_lookup_ = Parameter([embed_size, trg_vocab_size], I.XavierUniform()) self.pwhy_ = Parameter([trg_vocab_size, hidden_size], I.XavierUniform()) self.pby_ = Parameter([trg_vocab_size], I.Constant(0)) self.src_lstm_ = LSTM(name + "_src_lstm", embed_size, hidden_size) self.trg_lstm_ = LSTM(name + "_trg_lstm", embed_size, hidden_size)
def forecast_lstm(actions): lstm = LSTM(CONFIG) model = lstm.load_model() vocabulary = restore_vocabulary() actions_scores = readScores(CONFIG) previous_action = None for action in actions: if (action != ''): # Compare previous action with the incoming action. action = json.loads(action) incoming_action = action_to_vector(action, vocabulary) if (previous_action == None): previous_action = incoming_action continue score = getScore(actions_scores, action['name']) previous_action_transformed = lstm.pretransform_dataset( [previous_action], reshape=True) incoming_action_transformed = lstm.pretransform_dataset( [incoming_action], reshape=True) predicted = lstm.forecast(model, previous_action_transformed, incoming_action_transformed) # Print anomaly score and set the new one now as previous action. print lstm.calculate_score(incoming_action_transformed, predicted, score) previous_action = incoming_action
def load(name, prefix): encdec = EncoderDecoder.__new__(EncoderDecoder) encdec.name_ = name encdec.psrc_lookup_ = Parameter.load(prefix + name + "_src_lookup.param") encdec.ptrg_lookup_ = Parameter.load(prefix + name + "_trg_lookup.param") encdec.pwhy_ = Parameter.load(prefix + name + "_why.param") encdec.pby_ = Parameter.load(prefix + name + "_by.param") encdec.src_lstm_ = LSTM.load(name + "_src_lstm", prefix) encdec.trg_lstm_ = LSTM.load(name + "_trg_lstm", prefix) with open(prefix + name + ".config", "r") as ifs: encdec.dropout_rate_ = float(ifs.readline()) return encdec
def __init__(self, feature_config_path, dense_feature_config_path='', notes_feature_name='', use_notes_model=False, notes_model_params=None, variational_vocab_keep_prob=1.0, notes_vocab_keep_prob=1.0, notes_max_length=-1, notes_num_splits=0, bagging_timerange=3600, bagging_aggregate_older_than=3600000, is_training=True): """Params: feature_config_path: path to embedding config for discrete features. dense_feature_config_path: path to config file with statistics for continuous features. notes_feature_name: name of notes feature in inputs. use_notes_model: whether to run an LSTM on the notes. notes_model_params: dict with parameters for the notes LSTM. variational_vocab_keep_prob: vocabulary dropout rate for features other than notes. notes_vocab_keep_prob: vocabulary dropout rate for notes. notes_max_length: number of words to retain from notes per example. notes_num_splits: number of additional GPUs to distribute the notes LSTM across. bagging_timerange: length of timesteps to aggregate into a single bag (in seconds). bagging_aggregate_older_than: length of time before prediction beyond which all observations should be aggregated into a single bag (in seconds). is_training: whether model is in training or eval phase. """ self._is_training = is_training self._feature_config_path = feature_config_path self._dense_feature_config_path = dense_feature_config_path self._notes_feature_name = notes_feature_name self._use_notes_model = use_notes_model self._variational_vocab_keep_prob = variational_vocab_keep_prob self._notes_vocab_keep_prob = notes_vocab_keep_prob self._notes_max_length = notes_max_length self._notes_num_splits = notes_num_splits self._bagging_timerange = bagging_timerange self._bagging_aggregate_older_than = bagging_aggregate_older_than self._notes_model = None if use_notes_model: self._notes_model_dim = notes_model_params['model_dim'] self._notes_bidirectional = notes_model_params['bidirectional'] self._notes_model = LSTM(**notes_model_params)
def forward(self, xs): Wx, Wh, b = self.params N, T, D = xs.shape H = Wh.shape[0] hs = np.empty((N, T, H), dtype='f') if not self.stateful or self.h is None: self.h = np.zeros((N, H), dtype='f') if not self.stateful or self.c is None: self.c = np.zeros((N, H), dtype='f') for t in range(T): layer = LSTM(*self.params) self.h, self.c = layer.forward(xs[:, t, :], self.h, self.c) hs[:, t, :] = self.h self.layers.append(layer) return hs
def train_lstm(): lstm = LSTM(CONFIG) vocabulary = create_and_save_vocabulary(LOG_FILE) print "Start preprocessing data" iter_generator = create_iter_generator(LOG_FILE) actions_vectorized = [] for i, row in enumerate(iter_generator): action_vector = action_to_vector(row, vocabulary) actions_vectorized.append(action_vector) print "End preprocessing data" model = lstm.get_model() rmse = lstm.train_on_dataset(actions_vectorized, model)
def main(): images, labels = load_mnist("D:/Computer Science/Github/Mnist-tensorflow/") images_test, labels_test = load_mnist("D:/Computer Science/Github/Mnist-tensorflow/", "t10k") # lenet = Lenet(images, labels, images_test, labels_test, 0.5, 100, 20000) # lenet.train() # alexnet = Alexnet(images, labels, images_test, labels_test, 0.5, 100, 300) # alexnet.train() # vgg16 = VGG16(images, labels, images_test, labels_test, 0.5, 100, 300) # vgg16.train() lstm = LSTM(images, labels, images_test, labels_test, keep_pb=0.5, batch_size=100, epoch_size=500) lstm.train_network()
def run_service(): stocks = StockHelper.get_stock_symbol_mapping() for stock, symbol in stocks.items(): logger.info(f"Starting training for {stock} [{symbol}] at {ctime()}") models = { "SVM": SVM(symbol, scaler=StandardScaler), "ARIMA": ARIMA(symbol, scaler=LogScaler), "LSTM": LSTM(symbol, scaler=MinMaxScaler, is_keras=True), } for model_name, model in models.items(): logger.info(f"\tTraining {model_name} for {stock}") start_time = time() train_data = model.train_data n_days = 300 test_data = Series(index=get_next_n_trading_days(n_days)) predictions = model.fit_predict(n_days) predictions = Series(data=predictions, index=test_data.index[: len(predictions)]) save_predictions(predictions, type(model).__name__, symbol, train_data.index.max().to_pydatetime()) logger.info(f"\tTrained {model_name} for {stock} in {time() - start_time:.3f} seconds") logger.info(f"Finished training for {stock} [{symbol}] at {ctime()}")
def load_model(modelConfig, dataConfig, data): # Set number of cores for TensorFlow to use try: tf_config = tf.ConfigProto( inter_op_parallelism_threads=int(modelConfig['NumCores']), intra_op_parallelism_threads=int(modelConfig['NumCores'])) except KeyError: tf_config = tf.ConfigProto() raise UserWarning( "Number of cores to use not specified! Setting to TensorFlow default." ) with tf.Graph().as_default(), tf.Session(config=tf_config) as session: with tf.variable_scope('Model', reuse=None): m = LSTM(False, modelConfig, dataConfig) saver = tf.train.Saver() saver.restore(session, modelConfig['InputDirectory']) print 'Model successfuly restored!' data.prepBatches(m.batch_size, m.num_steps) ypred, _, _, _ = run_epoch(session, m, data, tf.no_op()) return ypred
def build(self): print '\t building rnn cell...' if self.cell=='gru': hidden_layer=GRU(self.rng, self.n_input,self.n_hidden,self.n_batch, self.x,self.E,self.x_mask, self.is_train,self.p) else: hidden_layer=LSTM(self.rng, self.n_input,self.n_hidden,self.n_batch, self.x,self.E,self.x_mask, self.is_train,self.p) print '\t building softmax output layer...' softmax_shape=(self.n_hidden,self.n_output) output_layer=H_Softmax(softmax_shape, hidden_layer.activation, self.y_node,self.y_choice,self.y_bit_mask,self.y_mask) self.params=[self.E,] self.params+=hidden_layer.params self.params+=output_layer.params cost=output_layer.activation lr=T.scalar("lr") gparams=[T.clip(T.grad(cost,p),-10,10) for p in self.params] updates=sgd(self.params,gparams,lr) self.train=theano.function(inputs=[self.x,self.x_mask,self.y_node,self.y_choice,self.y_bit_mask,self.y_mask,self.n_batch,lr], outputs=cost, updates=updates, givens={self.is_train:np.cast['int32'](1)}) self.test=theano.function(inputs=[self.x,self.x_mask,self.y_node,self.y_choice,self.y_bit_mask,self.y_mask,self.n_batch], outputs=cost, givens={self.is_train:np.cast['int32'](0)}) '''
class Predictor(object): def __init__(self, obs_length=5, pred_length=3): self.obs_length = obs_length self.pred_length = pred_length # Load the saved arguments to the model from the config file with open(os.path.join('save_lstm', 'config.pkl'), 'rb') as f: saved_args = pickle.load(f) # Initialize with the saved args self.model = LSTM(saved_args, True) self.sess = tf.InteractiveSession() saver = tf.train.Saver() # Get the checkpoint state to load the model from ckpt = tf.train.get_checkpoint_state('save_lstm') print('loading model: ', ckpt.model_checkpoint_path) # Restore the model at the checpoint saver.restore(self.sess, ckpt.model_checkpoint_path) def predict(self, path, full=False): obs_traj = path[:self.obs_length] # observed part of the trajectory # Get the complete trajectory with both the observed and the predicted part from the model predicted_traj, mu, var = self.model.sample(self.sess, obs_traj, num=self.pred_length, full=full) if full: return predicted_traj, mu, var else: return predicted_traj
def main(nn_type, data_type): print(" Starting... ") filename_train = 'qa1_single-supporting-fact_train.txt' filename_test = 'qa1_single-supporting-fact_test.txt' directory = 'data/babi_tasks/tasks_1-20_v1-2/en/' num_epochs = 500 #processor = Preprocessor(directory, filename_train, filename_test, data_type) #X_train, y_train, mask_train, X_test, y_test, mask_test, input_size, max_seq_len, idx2word = processor.extract_data() #wProc = WikiProcessor('C:/Users/Dan/Desktop/Crore/6.864/Project/Data/wiki_qa/') #wProc.process() #proc = CNNProcessor() # proc.process() if nn_type == "lstm": proc = BabiProcessor(data_type) X_train, y_train, mask_train, X_test, y_test, mask_test, input_size, max_seq_len, idx2word = proc.process() lstm = LSTM(X_train, y_train, mask_train, X_test, y_test, mask_test, idx2word) network, l_mask, l_in = lstm.build_model(input_size, max_seq_len) lstm.optimize(network, l_mask, l_in) elif nn_type == "mem_net" and data_type == "babi": mn = MemNet() mn.run('babi') elif nn_type == "mem_net" and data_type == "wiki_qa": mn = MemNet() mn.run(data_type) elif nn_type == "mem_net" and data_type == "cnn": mn = MemNet() mn.run('cnn_qa') elif nn_type == "dynam_net": proc = BabiProcessor(data_type, "dynam_net") X_train, Q_train, Y_train, mask_train, X_test, Q_test, Y_test, mask_test, input_size, max_seqlen, idx2word, max_queslen = proc.process() dn = DynamicMemNet(X_train, Q_train, Y_train, mask_train, X_test, Q_test, Y_test, mask_test, input_size, max_seqlen, idx2word, max_queslen) dn.build() dn.train() elif nn_type == "dynam_net_theano": #num_fact_hidden_units, number_classes, number_fact_embeddings, dimension_fact_embeddings, num_episode_hidden_units dmn_t = DMN_full_babi() dmn_t.train() print("Finished DMN Theano")
class TopicLSTM(object): def __init__(self, n_in, units, n_topics, sparsity=0, noise=NullNoise()): self.forward = LSTM(n_in, units) self.backward = LSTM(units, n_topics) self.trans = DirichletTransition(n_topics) self.emit = Emmission(n_topics, n_in) self.sparsity = sparsity self.noise = noise @property def weights(self): return self.forward.weights + self.backward.weights + self.trans.weights + self.emit.weights def transform(self, X, mask=None): Z_f = self.forward.scanl(X, mask=mask) Z = self.backward.scanr(Z_f, mask=None, activation=logsoftmax) return Z def loss(self, X, mask=None, flank=0, Z=None): if Z is None: Z = self.transform(self.noise(X), mask=mask) Tr = self.trans(Z) E = self.emit(Z) L = cross_entropy(T.shape_padright(Tr) + E, X) C = confusion(T.argmax(E,axis=-1), X, E.shape[-1]) if mask is not None: L *= T.shape_padright(mask) C *= T.shape_padright(T.shape_padright(mask)) n = X.shape[0] return L[flank:n-flank], C[flank:n-flank] def gradient(self, X, mask=None, flank=0): Z = self.transform(self.noise(X), mask=mask) L, C = self.loss(X, mask=mask, flank=flank, Z=Z) loss = T.sum(L) n = Z.shape[0] if self.sparcity > 0: R = self.sparcity*Z if mask is not None: R *= T.shape_padright(R) loss += T.sum(R[flank:n-flank]) gW = theano.grad(loss, self.weights) return gW, [L,C]
def __init__(self): self.model_lstm = LSTM(3136, 1045) #you should select path of saving parameters d_name = 'lstm_params/1epoch_params/' self.model_lstm.l1_x.W = np.load(d_name+'l1_x_W.npy') self.model_lstm.l1_x.b = np.load(d_name+'l1_x_b.npy') self.model_lstm.l1_h.W = np.load(d_name+'l1_h_W.npy') self.model_lstm.l1_h.b = np.load(d_name+'l1_h_b.npy') self.model_lstm.l6.W = np.load(d_name+'l6_W.npy') self.model_lstm.l6.b = np.load(d_name+'l6_b.npy') cuda.get_device(0).use() self.model_lstm=self.model_lstm.to_gpu()
train_data, valid_data = create_valid(zip(train_x, train_y, train_mask)) num_classes = len(label2idx) embedding = load_embedding(FLAGS.embedding_size, filename=FLAGS.embedding_file) test_x, test_y, test_mask = load_data(FLAGS.test_file, word2idx, label2idx, FLAGS.sequence_len) logging.info("load test data finish") #----------------------------------- load data end ---------------------- #----------------------------------- execute train --------------------------------------- with tf.Graph().as_default(): with tf.device("/cpu:0"): gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=FLAGS.gpu_options) session_conf = tf.ConfigProto(allow_soft_placement=FLAGS.allow_soft_placement, log_device_placement=FLAGS.log_device_placement, gpu_options=gpu_options) with tf.Session(config=session_conf).as_default() as sess: initializer = tf.random_uniform_initializer(-1 * FLAGS.init_scale, 1 * FLAGS.init_scale) with tf.variable_scope("model", reuse = None, initializer = initializer): model = LSTM(FLAGS.batch_size, FLAGS.sequence_len, embedding, FLAGS.embedding_size, FLAGS.attention_dim, FLAGS.rnn_size, FLAGS.num_rnn_layers, num_classes, FLAGS.max_grad_norm, dropout = FLAGS.dropout, is_training=True) with tf.variable_scope("model", reuse = True, initializer = initializer): valid_model = LSTM(FLAGS.batch_size, FLAGS.sequence_len, embedding, FLAGS.embedding_size, FLAGS.attention_dim, FLAGS.rnn_size, FLAGS.num_rnn_layers, num_classes, FLAGS.max_grad_norm, is_training=False) test_model = LSTM(FLAGS.batch_size, FLAGS.sequence_len, embedding, FLAGS.embedding_size, FLAGS.attention_dim, FLAGS.rnn_size, FLAGS.num_rnn_layers, num_classes, FLAGS.max_grad_norm, is_training=False) #add summary train_summary_dir = os.path.join(FLAGS.out_dir,"summaries","train") train_summary_writer = tf.train.SummaryWriter(train_summary_dir,sess.graph) #add checkpoint checkpoint_dir = os.path.abspath(os.path.join(FLAGS.out_dir, "checkpoints")) checkpoint_prefix = os.path.join(checkpoint_dir, "model") if not os.path.exists(checkpoint_dir): os.makedirs(checkpoint_dir) saver = tf.train.Saver(tf.all_variables())
def test_LSTM(): T = 5 batch_size = 2 nstates = 5 input_size = 4 unit = LSTM(input_size, nstates) W = unit.get_weights() X = np.random.randn(T, input_size, batch_size) unit.forget() acc_Y = unit.forward(X) wrand = np.random.randn(*acc_Y.shape) loss = np.sum(acc_Y * wrand) dY = wrand dX = unit.backward(dY) dW = unit.get_grads() unit.forget() def fwd(): unit.set_weights(W) h = unit.forward(X) unit.forget() return np.sum(h * wrand) delta = 1e-4 error_threshold = 1e-3 all_values = [X, W] backpropagated_gradients = [dX, dW] names = ['X', 'W'] error_count = 0 for v in range(len(names)): values = all_values[v] dvalues = backpropagated_gradients[v] name = names[v] for i in range(values.size): actual = values.flat[i] values.flat[i] = actual + delta loss_minus = fwd() values.flat[i] = actual - delta loss_plus = fwd() values.flat[i] = actual backpropagated_gradient = dvalues.flat[i] numerical_gradient = (loss_minus - loss_plus) / (2 * delta) if numerical_gradient == 0 and backpropagated_gradient == 0: error = 0 elif abs(numerical_gradient) < 1e-7 and abs(backpropagated_gradient) < 1e-7: error = 0 else: error = abs(backpropagated_gradient - numerical_gradient) / abs(numerical_gradient + backpropagated_gradient) if error > error_threshold: print 'FAILURE!!!\n' print '\tparameter: ', name, '\tindex: ', np.unravel_index(i, values.shape) print '\tvalues: ', actual print '\tbackpropagated_gradient: ', backpropagated_gradient print '\tnumerical_gradient', numerical_gradient print '\terror: ', error print '\n\n' error_count += 1 if error_count == 0: print 'LSTM Gradient Check Passed' else: print 'Failed for {} parameters'.format(error_count)
idxEnd = idxBegin + nIter # Filenames. modelBaseName = "lstm-model--id_{0}-batch_{1}-seq_{2}-lr_{3}-nh_{4}".format(youTubeId, batchSize, sequenceSeconds, learningRate, hiddenUnits) modelFileName = modelBaseName + ".pkl" graphFileName = modelBaseName + ".png" soundFileName = modelBaseName + ".wav" vals = [] error = np.array([0]) minError = np.inf idx = 0 scaling = 0 # create LSTM lstm = LSTM(miniBatches, hiddenUnits, miniBatches) # retrive datastream print("retrieving data...") data = YouTubeAudio(youTubeId) stream = data.get_example_stream() data_stream = Window(stride, sequenceSize, sequenceSize, True, stream) # switch to configure training or audio generation if mode == "train": print("training begin...") print("Input Size:", batchSize) print("minibatches:", miniBatches) print("stride:", stride) print("hidden units:", hiddenUnits)
def main(): n_steps = 5 n_epochs = 1000 n_data = 1000 n_valid_data = 20 # Generate some data. data_x, data_y = generate_data(n_data, tricky=True) logging.info(("First 10 data samples:")) logging.info(zip(*data_x)[:10]) logging.info(data_y[:10]) validation_data_x, validation_data_y = generate_data(n_valid_data, tricky=True) lstm = LSTM( learning_rate=0.1 ) lstm.build_train() s0 = np.zeros((n_data, lstm.n_cells, ), dtype=np.float32) s0_valid = np.zeros((n_valid_data, lstm.n_cells, ), dtype=np.float32) for e in range(1000): (loss, ) = lstm.train_step(s0, data_x, data_y) validation_data_est, validation_loss = lstm.validation_loss( s0_valid, validation_data_x, validation_data_y) logging.info("Epoch #%d: loss(%.5f) valid_loss(%.5f)" % (e, loss, validation_loss)) res_seq = enumerate(zip(validation_data_est[0], validation_data_y)) for i, (y_est, y_t) in res_seq: logging.info("%d: lbl(%.2f) clf(%.2f)" % (i, y_est, y_t, )) #for i, (x, y) in enumerate(zip(validation_data_x, validation_data_y)): return clf = lstm.build_clf_model(lstm.process_input()) f_clf = function([lstm.s0, lstm.n_steps, lstm.x], clf) """ x = np.ones((5, lstm.input_size), dtype=np.float32) x[0,0] = 0 x[1,0] = 1 x[2,0] = 2 x[3,0] = 3 x[4,0] = 4 print f_clf( np.zeros((lstm.n_cells, ), dtype=np.float32), 5, x ) return""" loss = lstm.build_loss(clf) f_loss = function([lstm.s0, lstm.n_steps, lstm.x, lstm.tgt], loss) input = np.ndarray((lstm.input_size), dtype=np.float32) input[:] = 1.0 loss_prime = theano.grad(loss, wrt=lstm.params.values()) f_loss_prime = function([lstm.s0, lstm.n_steps, lstm.x, lstm.tgt], loss_prime) for e in range(n_epochs): logging.info("Epoch #%d" % e) g = {} total_loss = 0.0 for x, y, n_steps in data: g_point = f_loss_prime( np.zeros((lstm.n_cells, ), dtype=np.float32), n_steps, x, y) total_loss += f_loss( np.zeros((lstm.n_cells, ), dtype=np.float32), n_steps, x, y) * 1.0 / len(data) for i in range(len(g_point)): if not i in g: g[i] = np.zeros_like(g_point[i], dtype=np.float32) for i in range(len(g_point)): g[i] += g_point[i] #* 1.0 / len(data) validation_loss = 0.0 for i, (x, y, n_steps) in enumerate(validation_data): args = [ np.zeros((lstm.n_cells, ), dtype=np.float32), n_steps, x, y ] validation_loss += f_loss(*args) * 1.0 / len(validation_data) if e % 50 == 0: logging.info("%d: tgt(%.0f) clf(%.2f)" % (i, y, f_clf(*args[ :-1]))) logging.info("train_loss(%.5f) valid_loss(%.5f)" % ( total_loss, validation_loss)) update(lstm.params, g, 0.1)
reader_valid = Reader(valid_md) reader_valid.word_dict = reader.word_dict reader_valid.tag_dict = reader.tag_dict reader_valid.codify_sentences() codified_sentences_valid = [n([t.codified_word for t in s]) for s in reader_valid.sentences] codified_tags_valid = [n([t.codified_tag for t in s]) for s in reader_valid.sentences] x = T.ivector('x') y = T.ivector('y') mask = T.ivector('mask') emb = Embedding(x, args.num_features, num_words+1) if args.dropout: dropout = Dropout(emb.output, args.num_features, args.dropout) lstm = LSTM(dropout.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features) else: lstm = LSTM(emb.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features) if args.load_models: print('... Loaded Models') emb.load(directory_model, varlist) lstm.load(directory_model, varlist) te, nll = lstm.errors(y) params = emb.params + lstm.params params_helper = emb.params_helper + lstm.params_helper rho = 10 lr = np.float32(float(args.learning_rate))
def load(state): lstm = LSTM.load(state['lstm']) output = Softmax.load(state['output']) obj = CharacterGenerator(lstm, output) return obj
parser.add_argument("--decay_rate", type=float, default=0.95) parser.add_argument("--dropout", type=float, default=0.0) parser.add_argument("--epochs", type=int, default=1) parser.add_argument("--grad_clip", type=int, default=5) parser.add_argument("--init_from", type=str, default="") args = parser.parse_args() train_data, val_data = load_data() n_epochs = args.epochs n_units = args.lstm_size grad_clip = args.grad_clip # LSTMを初期化 model = LSTM(3136, n_units) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() # 学習アルゴリズムのセットアップ optimizer = optimizers.RMSprop(lr=args.learning_rate, alpha=args.decay_rate, eps=1e-8) optimizer.setup(model.collect_parameters()) whole_len = len(train_data) whole_val_len = len(val_data) epoch = 0 start_at = time.time() cur_at = start_at end_time = 0 state = make_initial_state(n_units)
def __init__(self, imgX, imgY, input = None, n_hidden_enc = 100, n_hidden_dec = 100, n_z=100, n_steps = 8, batch_size = 100, rng = rng): #initialize parameters and if input == None: input = theano.shared(numpy.zeros((batch_size,imgX*imgY))) self.c0 = theano.shared(name='c0', value=numpy.random.uniform(-1.0, 1.0, (imgX*imgY)) .astype(theano.config.floatX)) self.rnn_enc = LSTM(n_hidden_dec+2*imgX*imgY,n_hidden_enc) self.rnn_dec = LSTM(n_z,n_hidden_dec) self.Z = RandomVariable(rng,n_in=n_hidden_enc,n_out=n_z) self.readHead = ReadHead(n_hidden_enc) self.writeHead = WriteHead(imgX,imgY,n_hidden_dec) self.X = RandomVariable(rng,n_in=imgX*imgY,n_out=imgX*imgY,sigmoid_mean=True) self.randSeq = rng.normal((n_steps,batch_size,n_z)) self.params = [self.c0] + self.readHead.params + self.rnn_enc.params + self.Z.params + self.rnn_dec.params + self.X.params + self.writeHead.params #turns vector into n_batches x vector_length matrix #concatenate operation won't broadcast so we add a 0 matrix with #the correct number of rows def vec2Matrix(v): t = v.dimshuffle(['x',0]) t = T.dot(input.dimshuffle([1,0])[0].dimshuffle([0,'x']),t) return v + T.zeros_like(t) def autoEncode(epsilon,ctm1,stm1_enc,htm1_enc,stm1_dec,htm1_dec,ztm1,x): x_err = x - T.nnet.sigmoid(ctm1) rt = self.readHead.read(x,x_err,htm1_dec) [s_t_enc,h_t_enc] = self.rnn_enc.recurrence( T.concatenate([rt,htm1_dec],axis=1),stm1_enc,htm1_enc[-1]) z_t = self.Z.conditional_sample(h_t_enc,epsilon) [s_t_dec,h_t_dec] = self.rnn_dec.recurrence(z_t,stm1_dec,htm1_dec) c_t = ctm1 + self.writeHead.write(h_t_dec) return [c_t,s_t_enc,htm1_enc+[h_t_enc],s_t_dec,htm1_dec,ztm1+[z_t]] c_t,s_t_enc,h_t_enc,s_t_dec,h_t_dec,z_t = [vec2Matrix(self.c0),vec2Matrix(self.rnn_enc.s0), [vec2Matrix(self.rnn_enc.h0)],vec2Matrix(self.rnn_dec.s0), vec2Matrix(self.rnn_dec.h0),[]] #would like to use scan here but runs into errors with computations involving random variables #also takes much longer to find gradient graph for i in range(n_steps): c_t,s_t_enc,h_t_enc,s_t_dec,h_t_dec,z_t = autoEncode(self.randSeq[i],c_t,s_t_enc,h_t_enc,s_t_dec,h_t_dec,z_t,input) def generate(epsilon,ctm1,stm1_dec,htm1_dec): [s_t_dec,h_t_dec] = self.rnn_dec.recurrence(epsilon,stm1_dec,htm1_dec) c_t = ctm1 + self.writeHead.write(h_t_dec) return [c_t,s_t_dec,h_t_dec] c_t2,s_t_dec2,h_t_dec2 = [vec2Matrix(self.c0),vec2Matrix(self.rnn_dec.s0), vec2Matrix(self.rnn_dec.h0)] for i in range(n_steps): c_t2,s_t_dec2,h_t_dec2 = generate(self.randSeq[i],c_t2,s_t_dec2,h_t_dec2) self.h_t_enc = T.stacklists(h_t_enc) self.cT = c_t self.lossX = T.sum(-self.X.log_conditional_prob(input,self.cT)) self.lossZ = T.sum(self.Z.latent_loss(self.h_t_enc)) self.loss = (self.lossX+self.lossZ)/batch_size #diff = (T.dot(self.cT,self.X.w_mean)-input) #var = T.exp(T.dot(self.cT,self.X.w_var)) self.test = self.loss self.generated_x = self.X.conditional_sample(self.cT,rng.normal((batch_size,imgX*imgY))) self.generated_x2 = self.X.conditional_sample(c_t2,rng.normal((batch_size,imgX*imgY))) self.mean = T.dot(self.cT,self.X.w_mean) self.var = T.exp(T.dot(self.cT,self.X.w_var))
try: with open(os.path.join(directory_model, 'reader.pkl'), 'rb') as f: reader = pickle.load(f) except: md = Metadata(args, args.filename, args.fixed_embeddings or args.learn_embeddings) reader = Reader(md, minimum_occurrence=2) num_tags = len(reader.tag_dict) num_words = len(reader.word_dict) print('... loading models') x = T.ivector('x') emb = Embedding(x, args.num_features, num_words+1) lstm = LSTM(emb.output, args.l2, args.hidden, num_words + 1, num_tags, args.num_features) emb.load(directory_model, varlist) lstm.load(directory_model, varlist) classify = th.function( inputs = [x], outputs = [lstm.y_pred, lstm.p_y_given_x]) print('#words: {}, #tags : {}, #hidden : {}, embedding size: {} '.format(\ len(reader.word_dict), len(reader.tag_dict), args.hidden, args.num_features)) print('>>> READY') while True: sent = input() coded = reader.codify_string(sent)