def add_lstm(self, inputs, i, name, backward=False): prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units]) # [2, batch_size, num_units] #prev_init = tf.zeros([2, 100, self.opts.units]) # [2, batch_size, num_units] if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units * 2 ## concat after each layer weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob) if backward: ## backward: reset states after zero paddings non_paddings = tf.transpose( self.weight, [1, 0]) ## [batch_size, seq_len] => [seq_len, batch_size] non_paddings = tf.reverse(non_paddings, [0]) cell_hidden = tf.scan( lambda prev, x: lstm(prev, x, weights, backward=backward), [inputs, non_paddings], prev_init) else: cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init) #cell_hidden [seq_len, 2, batch_size, units] h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units] return h
def __init__(self, num_chars, compression_vector_size): super(cclstm, self).__init__() self.autoencoder = nn.nn([num_chars,1000,250,100,250,1000,num_chars]) self.charlstm = lstm.lstm([self.autoencoder.layers[3],compression_vector_size,self.autoencoder.layers[3]],softmax=False) self.wordlstm = lstm.lstm([compression_vector_size,compression_vector_size + (compression_vector_size/2),compression_vector_size],softmax=False) self.sentlstm = lstm.lstm([self.wordlstm.layers[1],self.wordlstm.layers[1] + (self.wordlstm.layers[1]/2),self.wordlstm.layers[1]],softmax=False) self.responder = lstm.lstm([self.sentlstm.layers[1],self.sentlstm.layers[1] + (self.se.layers[1]/2),self.sentlstm.layers[1]],softmax=False) self.compression_vector_size = compression_vector_size self.response_vectors = [] self.response_lookup = []
def main(): prune_occurance_lt = 15 cutoff = 100 # numpy.inf # 1000 hidden_size = 100 num_epochs = 5 print("loading data...", end="", flush=True) train, dev, test = load_data() print("done.") print("pruning words with occurances < %s..." % prune_occurance_lt, end="", flush=True) vocab, train, dev, test = prune_words( train, dev, test, prune_occurances_lt=prune_occurance_lt) vocab_size = len(vocab) train, dev, test = cutoff_data(train, dev, test, cutoff=cutoff) print("done. vocab size: %s" % len(vocab)) print("vectorizing data with cutoff: %s..." % cutoff, end="", flush=True) # train_csr, dev_csr, test_csr = make_vectorized(train, dev, test, vocab) train_is, dev_is, test_is = convert_all_data_to_vocab_indices( train, dev, test, vocab) print("done.") print("Checking data formats...") """ check_inverse_indices(train, dev, test, train_is, dev_is, test_is, vocab) X_train_csr, Y_train_csr = train_csr X_dev_csr, Y_dev_csr = dev_csr X_test_csr, Y_test_csr = test_csr """ X_train_is, Y_train_is = VG(train_is[0], vocab_size), VG(train_is[1], vocab_size) X_dev_is, Y_dev_is = VG(dev_is[0], vocab_size), VG(dev_is[1], vocab_size) X_test_is, Y_test_is = VG(test_is[0], vocab_size), VG(test_is[1], vocab_size) """ check_data_format(X_train_csr, X_train_is) check_data_format(Y_train_csr, Y_train_is) check_data_format(X_dev_csr, X_dev_is) check_data_format(Y_dev_csr, Y_dev_is) check_data_format(X_test_csr, X_test_is) check_data_format(Y_test_csr, Y_test_is) """ print("done.") # print("training csr model for %s epoch(s)..." % num_epochs) # lm_csr = rnn.rnn(len(vocab), len(vocab), hidden_size=hidden_size, seed=10) # lm_csr.train(X_train_csr, Y_train_csr, verbose=2, epochs=num_epochs) print("training vg model for %s epoch(s)..." % num_epochs) lm_vg = lstm.lstm(len(vocab), len(vocab), hidden_size=hidden_size, seed=10) lm_vg.train(X_train_is, Y_train_is, verbose=2, epochs=num_epochs) # acc_csr = test_model(lm_csr, X_test_csr, Y_test_csr) acc_vg = test_model(lm_vg, X_test_is, Y_test_is) # print("csr acc: {0:.3f}".format(acc_csr)) print("vg acc: {0:.3f}".format(acc_vg))
async def predict( market: Optional[str] = Query( 'binance', title='The name of exchange', description= 'The name of crypto exchange such as binance, bitflyer ...'), symbol: Optional[str] = Query( 'btcusdt', title='The name of crypto pairs', description='The name of crypto pairs such as btcusdt, ethusdt ...' ), freq: Optional[int] = Query( 7200, title='The frequency of cryptowatch data', description= 'Time frequency (second) of cryptowatch time series data. (e.g. 7200)') ): res = { 'market': market, 'symbol': symbol, 'frequency': freq, 'predict': lstm(market, symbol, freq) } return res
def add_one_forward_mt(self, prev_list, x, lstm_weights_list, forward_inputs_tensor): ## compute one word in the forward direction ## no need to use x for now x is just dummy for getting a sequence loop prev_cell_hiddens = prev_list[0] prev_cell_hidden_list = tf.split(prev_cell_hiddens, self.opts.num_layers, axis=0) prev_predictions = prev_list[1] prev_embedding = self.add_stag_embedding( prev_predictions) ## [batch_size, inputs_dim] h = prev_embedding * self.stag_dropout_mat #h = tf.concat([x, prev_embedding], 1) cell_hiddens = [] for i in xrange(self.opts.num_layers): weights = lstm_weights_list[i] cell_hidden = lstm(prev_cell_hidden_list[i], h, weights) ## [2, batch_size, units] cell_hiddens.append(cell_hidden) h = tf.unstack(cell_hidden, 2, axis=0)[1] ## [batch_size, units] cell_hiddens = tf.concat(cell_hiddens, 0) projected_outputs = self.add_projection(h) ## [batch_size, nb_tags] predictions = self.add_predictions(projected_outputs) ## [batch_sizes] new_state = [cell_hiddens, predictions, projected_outputs] return new_state
def __init__(self, vocab_size, embedding_size_vocab, tag_size, embedding_size_tag, lstm_units, attention_units, dense_units): super(MANNModel, self).__init__() self.lstm = lstm(lstm_units=lstm_units) self.lstm_units = lstm_units self.TCA = TCA(attention_units=attention_units) self.embedding_vocab = tf.keras.layers.Embedding( name="embedding_vocab", input_dim=vocab_size, output_dim=embedding_size_vocab, ) self.embedding_tag = tf.keras.layers.Embedding( name="embedding_tag", input_dim=tag_size, output_dim=embedding_size_tag, ) self.dense1 = tf.keras.layers.Dense( name="dense_1", units=dense_units, activation=tf.keras.activations.relu, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), ) self.dense2 = tf.keras.layers.Dense( name="dense_2", units=1, activation=tf.keras.activations.sigmoid, kernel_regularizer=tf.keras.regularizers.l2(REG_LAMBDA), )
def make_mean_encoder(num_layer, num_hidden, dropout=0., vocab_size=0, num_embed=0, with_embedding=False): param_cells = [] last_state = [] for i in xrange(num_layer): param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable('en_l%d_i2h_weight' % i), i2h_bias=mx.sym.Variable('en_l%d_i2h_bias' % i), h2h_weight=mx.sym.Variable('en_l%d_h2h_weight' % i), h2h_bias=mx.sym.Variable('en_l%d_h2h_bias' % i))) last_state.append(LSTMState(c=mx.sym.Variable('l%d_init_c' % i), h=mx.sym.Variable('l%d_init_h' % i))) assert len(last_state) == num_layer encoder_input = mx.sym.Variable('encoder_input') if with_embedding is True: assert vocab_size > 0 and num_embed > 0 encoder_embed_weight = mx.sym.Variable('en_embed_weight') encoder_input = mx.sym.Embedding(data=encoder_input, input_dim=vocab_size, output_dim=num_embed, weight=encoder_embed_weight, name='encoder_embed') mean_input = mx.sym.sum(data=encoder_input, axis=1, keepdims=False) hidden = mean_input for i in xrange(num_layer): if i == 0: dp_ratio = 0 else: dp_ratio = dropout next_state = lstm(data=hidden, num_hidden=num_hidden, seqidx=0, layeridx=i, param=param_cells[i], prev_state=last_state[i], dropout=dp_ratio) hidden = next_state.h last_state[i] = next_state return last_state
def test(config): physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) model = lstm.lstm(config) train_dataset = get_dataset("test", config) checkpoint = tf.train.Checkpoint(model=model) checkpoint.restore()
def eager_train(config): physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) model = lstm.lstm(config) train_dataset = get_dataset("train", config) test_dataset = iter(get_dataset("dev", config)) test_data_iterator = data_stream(test_dataset).iterator() optimizer = tf.keras.optimizers.Adam(learning_rate=config["learning_rate"]) checkpoint = tf.train.Checkpoint(optimizer=optimizer, model=model) #加载预训练模型 embedding_table = tf.train.load_variable( "./chinese_L-12_H-768_A-12", "bert/embeddings/word_embeddings") model.embedding_table.assign(embedding_table) step = 0 writer = tf.summary.create_file_writer("tf.log") for batch_data in train_dataset: with tf.GradientTape() as tape: prob, loss = model.predict(batch_data) train_variables = model.trainable_variables gradients = tape.gradient(target=loss, sources=train_variables) optimizer.apply_gradients(zip(gradients, train_variables)) step += 1 if step % 1000 == 0: print(loss) if step % 20000 == 0: checkpoint.save("./output/lstm-%d" % step) test_data = next(test_dataset) _, test_loss = model.predict(test_data) with writer.as_default(): tf.summary.scalar("loss", loss, step) tf.summary.scalar("test_loss", test_loss, step) writer.flush() checkpoint.save("./output/final")
def add_one_forward(self, prev_list, x, lstm_weights_list, backward_embeddings): ## compute one word in the forward direction prev_cell_hiddens = prev_list[0] prev_cell_hidden_list = tf.split(prev_cell_hiddens, self.opts.num_layers, axis=0) prev_predictions = prev_list[1] time_step = prev_list[2] prev_embedding = self.add_stag_embedding( prev_predictions) ## [batch_size, inputs_dim] prev_embedding = prev_embedding * self.stag_dropout_mat h = tf.concat([x, prev_embedding], 1) cell_hiddens = [] for i in xrange(self.opts.num_layers): weights = lstm_weights_list[i] cell_hidden = lstm(prev_cell_hidden_list[i], h, weights) ## [2, batch_size, units] cell_hiddens.append(cell_hidden) h = tf.unstack(cell_hidden, 2, axis=0)[1] ## [batch_size, units] h = h * self.lstm_dropout_mats[i] cell_hiddens = tf.concat(cell_hiddens, 0) with tf.device('/cpu:0'): backward_h = tf.nn.embedding_lookup( backward_embeddings, time_step) ## [batch_size, units] bi_h = tf.concat([h, backward_h], 1) ## [batch_size, outputs_dim] projected_outputs = self.add_projection(bi_h) ## [batch_size, nb_tags] predictions = self.add_predictions(projected_outputs) ## [batch_sizes] time_step += 1 new_state = [cell_hiddens, predictions, time_step, projected_outputs] return new_state
def load_nlu_model(self, model_path): """ load the trained NLU model """ model_params = pickle.load(open(model_path, 'rb')) hidden_size = model_params['model']['Wd'].shape[0] output_size = model_params['model']['Wd'].shape[1] if model_params['params']['model'] == 'lstm': # lstm_ input_size = model_params['model']['WLSTM'].shape[ 0] - hidden_size - 1 rnnmodel = lstm(input_size, hidden_size, output_size) elif model_params['params']['model'] == 'bi_lstm': # bi_lstm input_size = model_params['model']['WLSTM'].shape[ 0] - hidden_size - 1 rnnmodel = biLSTM(input_size, hidden_size, output_size) rnnmodel.model = copy.deepcopy(model_params['model']) self.model = rnnmodel self.word_dict = copy.deepcopy(model_params['word_dict']) self.slot_dict = copy.deepcopy(model_params['slot_dict']) self.act_dict = copy.deepcopy(model_params['act_dict']) self.tag_set = copy.deepcopy(model_params['tag_set']) self.params = copy.deepcopy(model_params['params']) self.inverse_tag_dict = { self.tag_set[k]: k for k in self.tag_set.keys() }
def lstm_unroll(num_lstm_layer, seq_len, input_size, num_hidden, num_embed, num_label, dropout=0.): embed_weight=mx.sym.Variable("embed_weight") cls_weight = mx.sym.Variable("cls_weight") cls_bias = mx.sym.Variable("cls_bias") param_cells = [] last_states = [] for i in range(num_lstm_layer): param_cells.append(LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i), i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i), h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i), h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i))) state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i), h=mx.sym.Variable("l%d_init_h" % i)) last_states.append(state) assert(len(last_states) == num_lstm_layer) loss_all = [] for seqidx in range(seq_len): # embeding layer data = mx.sym.Variable("data/%d" % seqidx) hidden = mx.sym.Embedding(data=data, weight=embed_weight, input_dim=input_size, output_dim=num_embed, name="t%d_embed" % seqidx) # stack LSTM for i in range(num_lstm_layer): if i==0: dp=0. else: dp = dropout next_state = lstm(num_hidden, indata=hidden, prev_state=last_states[i], param=param_cells[i], seqidx=seqidx, layeridx=i, dropout=dp) hidden = next_state.h last_states[i] = next_state # decoder if dropout > 0.: hidden = mx.sym.Dropout(data=hidden, p=dropout) fc = mx.sym.FullyConnected(data=hidden, weight=cls_weight, bias=cls_bias, num_hidden=num_label) sm = mx.sym.SoftmaxOutput(data=fc, label=mx.sym.Variable('label/%d' % seqidx), name='t%d_sm' % seqidx) loss_all.append(sm) # for i in range(num_lstm_layer): # state = last_states[i] # state = LSTMState(c=mx.sym.BlockGrad(state.c, name="l%d_last_c" % i), # h=mx.sym.BlockGrad(state.h, name="l%d_last_h" % i)) # last_states[i] = state # # unpack_c = [state.c for state in last_states] # unpack_h = [state.h for state in last_states] # # return mx.sym.Group(loss_all + unpack_c + unpack_h) return mx.sym.Group(loss_all)
def forward(self, x_data, weight, bias, hiddenSize, miniBatch, seqLength, numLayers): out = lstm()(x_data, weight, bias, hiddenSize, miniBatch, seqLength, numLayers) out = out.view(mini_batch, hidden_size).cuda() out = self.fc(out) return out
def __init__(self, g_dims, content_encoder, batch_size, rnn_size=256, rnn_layers=1): super().__init__() self.content_encoder = content_encoder self.content_rnn = lstm(g_dims, g_dims, rnn_size, rnn_layers, batch_size)
def processByLSTM(data, scope_name, time_step, file_name): train_x, train_y, test_x = formData(data, time_step) l = lstm() if time_step != 160: l.train_lstm(train_x, train_y, scope_name, time_step, file_name) answer = l.prediction(train_x, scope_name, time_step, file_name) # error = computeError(answer, train_y) return answer
def predict(): if request.method == 'GET': query_parameters = request.args if query_parameters: symbol = query_parameters.get('symbol') result = {'symbol': symbol, 'predict': lstm(symbol)} return jsonify(result) else: return 'Select Symbol like /api/predict?symbol=AMD' else: return 'Do GET action!!'
def make_sequence_decoder(init_state, seqlen, num_layer, num_hidden, num_label, dropout=0., vocab_size=0, num_embed=0, with_embedding=False): fc_weight = mx.sym.Variable('de_fc_weight') fc_bias = mx.sym.Variable('de_fc_bias') param_cells = [] last_state = init_state for i in xrange(num_layer): param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable('de_l%d_i2h_weight' % i), i2h_bias=mx.sym.Variable('de_l%d_i2h_bias' % i), h2h_weight=mx.sym.Variable('de_l%d_h2h_weight' % i), h2h_bias=mx.sym.Variable('de_l%d_h2h_bias' % i))) assert len(last_state) == num_layer decoder_input = mx.sym.Variable('decoder_input') decoder_output = mx.sym.Variable('decoder_output') if with_embedding is True: assert vocab_size > 0 and num_embed > 0 decoder_embed_weight = mx.sym.Variable('de_embed_weight') decoder_input = mx.sym.Embedding(data=decoder_input, input_dim=vocab_size, output_dim=num_embed, weight=decoder_embed_weight, name='decoder_embed') slice_input = mx.sym.SliceChannel(data=decoder_input, num_outputs=seqlen, axis=1, squeeze_axis=1) hidden_all = [] for seqidx in xrange(seqlen): hidden = slice_input[seqidx] for i in xrange(num_layer): if i == 0: dp_ratio = 0 else: dp_ratio = dropout next_state = lstm(data=hidden, num_hidden=num_hidden, seqidx=seqidx, layeridx=i, param=param_cells[i], prev_state=last_state[i], dropout=dp_ratio) hidden = next_state.h last_state[i] = next_state if dropout > 0.: hidden = mx.sym.Dropout(data=hidden, p=dropout) hidden_all.append(hidden) hidden_concat = mx.sym.Concat(*hidden_all, dim=0) fc = mx.sym.FullyConnected(data=hidden_concat, num_hidden=num_label, weight=fc_weight, bias=fc_bias, name='fc') decoder_output = mx.sym.transpose(data=decoder_output) decoder_output = mx.sym.Reshape(data=decoder_output, shape=(-1,)) sm = mx.sym.SoftmaxOutput(data=fc, label=decoder_output, name='decoder_softmax') return sm
def crnn_lstm(network, per_batch_size): # input data = mx.sym.Variable('data') label = mx.sym.Variable('label') net, _ = get_conv_feat(data) # b, c, h, w hidden_concat = lstm(net,num_lstm_layer=config.num_lstm_layer, num_hidden=config.num_hidden, seq_length=config.seq_length) # mx.sym.transpose(net, []) pred = mx.sym.FullyConnected(data=hidden_concat, num_hidden=config.num_classes) # (bz x 25) x num_classes label = mx.sym.Reshape(data=label, shape=(-1,)) label = mx.sym.Cast(data=label, dtype='int32') return mx.sym.WarpCTC(data=pred, label=label, label_length=config.num_label, input_length=config.seq_length)
def main(): X_Vec = np.load('tokened_text.npy', allow_pickle = True) y = np.load('y.npy', allow_pickle = True) print("下载数据完成................") print("开始构建词向量................") input_dim,embedding_weights,w2dic = word2vec_train(X_Vec) print("构建词向量完成................") index = data2inx(w2dic,X_Vec) index2 = sequence.pad_sequences(index, maxlen=voc_dim ) x_train, x_test, y_train, y_test = train_test_split(index2, y, test_size=0.2, random_state=1) y_train = keras.utils.to_categorical(y_train, num_classes=7) y_test = keras.utils.to_categorical(y_test, num_classes=7) model=lstm(input_dim, embedding_weights) train_lstm(model, x_train, y_train, x_test, y_test)
def predict(config): physical_devices = tf.config.experimental.list_physical_devices('GPU') tf.config.experimental.set_memory_growth(physical_devices[0], True) model = lstm.lstm(config) dataset = get_dataset("test", config) optimizer = tf.keras.optimizers.Adam(learning_rate=config["learning_rate"]) checkpoint = tf.train.Checkpoint(model=model, optimizer=optimizer) status = checkpoint.restore(tf.train.latest_checkpoint("./output/")) status.assert_existing_objects_matched() writer = open('result', 'w') for item in dataset: prob, _ = model.predict(item, False) res = tf.math.argmax(prob, -1) for label in list(res.numpy()): writer.write(str(list(label)) + "\n") writer.close()
def forward(self, x_data, weight, bias, hiddenSize, miniBatch, seqLength, numLayers): # start_t = time.time() out = lstm()(x_data, weight, bias, hiddenSize, miniBatch, seqLength, numLayers) # elapsed_t = time.time() - start_t # print("Module wrapper time:\t%f seconds"%(elapsed_t)) out = out.view(mini_batch, hidden_size).cuda() # print(out) out = self.fc(out) # print(np.shape(out)) return out
def __init__(self, data, epochs, batch_size, training_ratio, sequance_length, lstmCells=10, LSTMDL1units=20, LSTMDL2units=5, LSTMDL3units=1, CL1filters=1, CL1kernal_size=2, CL1strides=1, PL1pool_size=1, CNNDL1units=20, CNNDL2units=5, CNNDL3units=1, lstmWeight=0.5, cnnWeight=0.5, learningRate=0.001): self.lstm_model = lstm.lstm(data=data, epochs=epochs, batch_size=batch_size, training_ratio=training_ratio, sequance_length=sequance_length, lstmCells=lstmCells, learningRate=learningRate) self.cnn_model = cnn.cnn(data=data, epochs=epochs, batch_size=batch_size, training_ratio=training_ratio, sequance_length=sequance_length, CL1filters=CL1filters, CL1kernal_size=CL1kernal_size, CL1strides=CL1strides, PL1pool_size=PL1pool_size, DL1units=CNNDL1units, DL2units=CNNDL2units, DL3units=CNNDL3units, learningRate=learningRate) self.lstmWeight = lstmWeight self.cnnWeight = cnnWeight
def train_lstm_experiment(theme): Syn_aug = True # it False faster but does slightly worse on Test dataset save_model = True model_name = "negative5000_negscore" + str(negative_sampling.negative_score) + \ "genamount" + str(negative_sampling.new_examples_amout) + "_model" epoch_num_pre_training = 66 epoch_num_training = 350 print model_name print print "epoch num pre-training: " + str(epoch_num_pre_training) print "epoch num training: " + str(epoch_num_training) sls = lstm(training=True) test = pickle.load(open(data_folder + "semtest.p", 'rb')) train = pickle.load(open(data_folder + "stsallrmf.p", "rb")) print "Loading pre-training model" sls.train_lstm(train, epoch_num_pre_training) print "Pre-training done" train = pickle.load(open(data_folder + "semtrain.p", 'rb')) if Syn_aug: # print "Train with negative sampling" # train_enriched = extend_negative_samples(train) print "Train with positive sampling" train_enriched = expand_positive_examples(train, ignore_flag=True) sls.train_lstm(train_enriched, epoch_num_training, eval_data=test, disp_freq=25) else: print "Train normaly" sls.train_lstm(train, epoch_num_training, eval_data=test) print sls.check_error(test) if save_model: sls.save_to_pickle(cn.tmp_expr_foldpath + "/" + model_name + ".p") # Example sa = "A truly wise man" sb = "He is smart" print sls.predict_similarity(sa, sb) * 4.0 + 1.0 return theme + model_name
def get_model(self, model_type, parameters): if model_type == "multi_arma": return arma(parameters=parameters) elif model_type == "multi_arima": return arima(parameters=parameters) elif model_type == "multi_lstm": return lstm(parameters=parameters) elif model_type == "multi_cnn": return cnn(parameters=parameters) elif model_type == "multi_lstmcnn": return lstmcnn(parameters=parameters) elif model_type == "multi_lstmcnn_kerascombinantion": return lstmcnn_kerascombinantion(parameters=parameters)
def __init__(self, args): args.deterministic = True encoder_checkpoint = torch.load(args.encoder_checkpoint) if args.dataset in ["mnist", "dsprites"]: Ec = models.content_encoder(args.g_dims, nc=args.num_channels).cuda() Ep = models.pose_encoder(args.z_dims, nc=args.num_channels).cuda() else: Ec = vgg_64.encoder(args.g_dims, nc=args.num_channels).cuda() Ep = resnet_64.pose_encoder(args.z_dims, nc=args.num_channels).cuda() if args.dataset == "mpi3d_real": D = vgg_64.drnet_decoder(args.g_dims, args.z_dims, nc=args.num_channels).cuda() else: D = models.decoder(args.g_dims, args.z_dims, nc=args.num_channels, skips=args.skips).cuda() Ep.load_state_dict(encoder_checkpoint["position_encoder"]) Ec.load_state_dict(encoder_checkpoint["content_encoder"]) D.load_state_dict(encoder_checkpoint["decoder"]) self.Ep = nn.DataParallel(Ep) self.Ec = nn.DataParallel(Ec) self.D = nn.DataParallel(D) self.Ep.train() self.Ec.train() self.D.train() lstm_model = lstm(args.g_dims + args.z_dims, args.z_dims, args.rnn_size, args.rnn_layers, args.batch_size).cuda() nets = {"lstm": lstm_model} lstm_optim = torch.optim.Adam(lstm_model.parameters(), lr=args.lr, betas=(args.beta1, 0.999)) optims = {"lstm_optim": lstm_optim} super().__init__(nets, optims, args)
def add_lstm(self, inputs, i, name): ## need to access c prev_init = tf.zeros([2, tf.shape(inputs)[1], self.opts.units]) # [2, batch_size, num_units] #prev_init = tf.zeros([2, 100, self.opts.units]) # [2, batch_size, num_units] if i == 0: inputs_dim = self.inputs_dim else: inputs_dim = self.opts.units weights = get_lstm_weights('{}_LSTM_layer{}'.format(name, i), inputs_dim, self.opts.units, tf.shape(inputs)[1], self.hidden_prob) cell_hidden = tf.scan(lambda prev, x: lstm(prev, x, weights), inputs, prev_init) #cell_hidden [seq_len, 2, batch_size, units] c = tf.unstack(cell_hidden, 2, axis=1)[0] #[seq_len, batch_size, units] h = tf.unstack(cell_hidden, 2, axis=1)[1] #[seq_len, batch_size, units] return c, h
def LSTMStudentForSequenceClassification(input_ids_blob, input_mask_blob, token_type_ids_blob, label_blob, vocab_size, seq_length=512, hidden_size=300, intermediate_size=400, num_hidden_layers=1, hidden_dropout_prob=0.5, initializer_range=0.25, label_num=2, is_student=True, is_train=True): with flow.scope.namespace('student'): with flow.scope.namespace("embeddings"): (embedding_output_, embedding_table_) = _EmbeddingLookup( input_ids_blob=input_ids_blob, vocab_size=vocab_size + 1, embedding_size=hidden_size, word_embedding_name="word_embeddings", is_train=is_train) with flow.scope.namespace('lstm'): output = lstm(embedding_output_, hidden_size, return_sequence=False, is_train=is_train) output = flow.layers.dense( inputs=output, units=intermediate_size, activation=flow.nn.relu, kernel_initializer=CreateInitializer(initializer_range), trainable=is_train, name='FC1') output = _Dropout(output, hidden_dropout_prob) logit_blob = flow.layers.dense( inputs=output, units=label_num, kernel_initializer=CreateInitializer(initializer_range), trainable=is_train, name='FC2') return logit_blob
def __init__(self, parameters): lstm_parmeters = { # for all "data":parameters["data"], "training_ratio":parameters["training_ratio"], "no_of_prediction_points":parameters["no_of_prediction_points"], # for LSTM "epochs":parameters["epochs"], "batch_size":parameters["batch_size"], "sequance_length":parameters["sequance_length"], "DL1units":parameters["LSTMDL1units"], "DL2units":parameters["LSTMDL2units"], "DL3units":parameters["LSTMDL3units"], "lstmCells":parameters["lstmCells"], } self.lstm_model = lstm(lstm_parmeters) cnn_parmeters = { # for all "data":parameters["data"], "training_ratio":parameters["training_ratio"], "no_of_prediction_points":parameters["no_of_prediction_points"], # for LSTM "epochs":parameters["epochs"], "batch_size":parameters["batch_size"], "sequance_length":parameters["sequance_length"], "DL1units":parameters["CNNDL1units"], "DL2units":parameters["CNNDL2units"], "DL3units":parameters["CNNDL3units"], "CL1filters":parameters["CL1filters"], "CL1kernal_size":parameters["CL1kernal_size"], "CL1strides":parameters["CL1strides"], "PL1pool_size":parameters["PL1pool_size"], } self.cnn_model = cnn(cnn_parmeters) self.lstmWeight = parameters["lstmWeight"] self.cnnWeight = parameters["cnnWeight"] self.no_of_prediction_points = parameters["no_of_prediction_points"]
def add_one_beam_forward(self, prev_list, x, lstm_weights_list, backward_embeddings, beam_size, batch_size, post_first=False): ## compute one word in the forward direction prev_cell_hiddens = prev_list[0] ## [2, batch_size, units*num_layers] prev_cell_hidden_list = tf.split(prev_cell_hiddens, self.opts.num_layers, axis=2) ## [[2, batch_size, units] x num_layers] prev_predictions = prev_list[1] ## [batch_size] time_step = prev_list[2] ## 0D prev_scores = prev_list[3] ## [batch_size (self.batch_size*beam_size), 1] prev_embedding = self.add_stag_embedding(prev_predictions) ## [batch_size, inputs_dim] #prev_embedding = prev_embedding*self.stag_dropout_mat h = tf.concat([x, prev_embedding], 1) ## [batch_size, inputs_dim + lm] cell_hiddens = [] for i in xrange(self.opts.num_layers): weights = lstm_weights_list[i] cell_hidden = lstm(prev_cell_hidden_list[i], h, weights, post_first) ## [2, batch_size, units] cell_hiddens.append(cell_hidden) h = tf.unstack(cell_hidden, 2, axis=0)[1] ## [batch_size, units] cell_hiddens = tf.concat(cell_hiddens, 2) ## [2, batch_size, units*num_layers] with tf.device('/cpu:0'): backward_h = tf.nn.embedding_lookup(backward_embeddings, time_step) ## [self.batch_size, units] if post_first: ## batch_size = self.batch_size*beam_size backward_h = tf.reshape(tf.tile(backward_h, [1, beam_size]), [batch_size, -1]) ## [batch_size, units] bi_h = tf.concat([h, backward_h], 1) ## [batch_size, outputs_dim] projected_outputs = self.add_projection(bi_h, post_first) ## [batch_size, nb_tags] scores, indices = self.add_top_k(projected_outputs, prev_scores, beam_size, post_first) ## [self.batch_size, beam_size], [self.batch_size, beam_size] scores = tf.stop_gradient(scores) indices = tf.stop_gradient(indices) predictions = indices % self.loader.nb_tags ##[b, B] scores = tf.reshape(scores, [-1, 1]) ## [batch_size, 1] predictions = tf.reshape(predictions, [-1]) ## [batch_size] if post_first: parent_indices = tf.reshape(tf.range(0, batch_size, beam_size), [-1, 1]) + indices//self.loader.nb_tags ## [self.batch_size, 1] + [self.batch_size, beam_size] parent_indices = tf.reshape(parent_indices, [-1]) ## [self.batch_size*beam_size (batch_size)] cell_hiddens = tf.transpose(cell_hiddens, [1, 0, 2]) ## [batch_size, 2, units*num_layers] with tf.device('/cpu:0'): cell_hiddens = tf.nn.embedding_lookup(cell_hiddens, parent_indices) ## [batch_size, 2, units*num_layers] cell_hiddens = tf.transpose(cell_hiddens, [1, 0, 2]) ## [2, batch_size, units*num_layers] else: parent_indices = tf.zeros([batch_size*beam_size], tf.int32) ## Dummy parent indices for the first iteration. We know parents for the first iteration cell_hiddens = tf.reshape(tf.tile(cell_hiddens, [1, 1, beam_size]), [2, batch_size*beam_size, -1]) time_step += 1 new_state = [cell_hiddens, predictions, time_step, scores, parent_indices] return new_state
def main(): for line in open('dataset_www2018_rand.txt'): line = line.strip().split('\t') hashtag = line[0] active_period = int(line[1]) htg_acpd_dict[hashtag] = active_period hashtag_trainingset = get_trainingset_hashtag() print '!!!\nhashtags of the trainingset have been picked out.\n!!!' target = tf.placeholder(tf.float32, [None, num_classes]) lstm_data = tf.placeholder(tf.float32, [None, lstm_max_length, num_lstm_features]) lstm_sequence_length_vector = tf.placeholder(tf.int32, [None]) lstm_dropout = tf.placeholder(tf.float32) lstm_output = lstm.lstm(lstm_data, lstm_sequence_length_vector, lstm_dropout) cnn_data = tf.placeholder(tf.float32, [None, None]) reshape_cnn_data = tf.reshape(data,[-1,1,100,1]) cnn_output = cnn.cnn(reshape_cnn_data) #getcountlengthembeddings part length_and_count length_and_count_embeddings = get_length_and_count_embeddings() prediction_output = overall_prediction(lstm_output, cnn_output, length_and_count_embeddings) cost_output = cost(prediction_output,target) optimizer_output = optimizer(cost) sess = tf.Session() sess.run(tf.global_variables_initializer()) for epoch in range(num_epochs): np.random.shuffle(hashtag_trainingset) mse_sum = 0 for i in range(num_iterations):
def make_sequence_decoder_inference(num_layer, num_hidden, num_label, dropout=0., vocab_size=0, num_embed=0, with_embedding=False): fc_weight = mx.sym.Variable('de_fc_weight') fc_bias = mx.sym.Variable('de_fc_bias') param_cells = [] last_state = [] for i in xrange(num_layer): param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable('de_l%d_i2h_weight' % i), i2h_bias=mx.sym.Variable('de_l%d_i2h_bias' % i), h2h_weight=mx.sym.Variable('de_l%d_h2h_weight' % i), h2h_bias=mx.sym.Variable('de_l%d_h2h_bias' % i))) last_state.append(LSTMState(c=mx.sym.Variable('l%d_init_c' % i), h=mx.sym.Variable('l%d_init_h' % i))) assert len(last_state) == num_layer decoder_input = mx.sym.Variable('decoder_input') if with_embedding is True: assert vocab_size > 0 and num_embed > 0 decoder_embed_weight = mx.sym.Variable('de_embed_weight') decoder_input = mx.sym.Embedding(data=decoder_input, input_dim=vocab_size, output_dim=num_embed, weight=decoder_embed_weight, name='decoder_embed') hidden = decoder_input for i in xrange(num_layer): if i == 0: dp_ratio = 0 else: dp_ratio = dropout next_state = lstm(data=hidden, num_hidden=num_hidden, seqidx=0, layeridx=i, param=param_cells[i], prev_state=last_state[i], dropout=dp_ratio) hidden = next_state.h last_state[i] = next_state if dropout > 0: hidden = mx.sym.Dropout(data=hidden, p=dropout) fc = mx.sym.FullyConnected(data=hidden, num_hidden=num_label, weight=fc_weight, bias=fc_bias, name='fc') sm = mx.sym.SoftmaxActivation(data=fc, name='decoder_softmax') output = [sm] for state in last_state: output.append(state.c) output.append(state.h) return mx.sym.Group(output)
def make_sequence_encoder_inference(seqlen, num_layer, num_hidden, dropout=0., vocab_size=0, num_embed=0, with_embedding=False): param_cells = [] last_state = [] for i in xrange(num_layer): param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable('en_l%d_i2h_weight' % i), i2h_bias=mx.sym.Variable('en_l%d_i2h_bias' % i), h2h_weight=mx.sym.Variable('en_l%d_h2h_weight' % i), h2h_bias=mx.sym.Variable('en_l%d_h2h_bias' % i))) last_state.append(LSTMState(c=mx.sym.Variable('l%d_init_c' % i), h=mx.sym.Variable('l%d_init_h' % i))) assert len(last_state) == num_layer encoder_input = mx.sym.Variable('encoder_input') if with_embedding is True: assert vocab_size > 0 and num_embed > 0 encoder_embed_weight = mx.sym.Variable('en_embed_weight') encoder_input = mx.sym.Embedding(data=encoder_input, input_dim=vocab_size, output_dim=num_embed, weight=encoder_embed_weight, name='encoder_embed') slice_input = mx.sym.SliceChannel(data=encoder_input, num_outputs=seqlen, axis=1, squeeze_axis=1) for seqidx in xrange(seqlen): hidden = slice_input[seqidx] for i in xrange(num_layer): if i == 0: dp_ratio = 0 else: dp_ratio = dropout next_state = lstm(data=hidden, num_hidden=num_hidden, seqidx=seqidx, layeridx=i, param=param_cells[i], prev_state=last_state[i], dropout=dp_ratio) hidden = next_state.h last_state[i] = next_state output = [] for state in last_state: output.append(state.c) output.append(state.h) return mx.sym.Group(output)
def __init__(self, input_dims, lstm_dims, fc_dims_end ): # fc_dims_end is excluding the final 10 sized output layer last_dims = input_dims self.lstm_layers = [] self.fc_layers = [] self.trainable_variables = [] count = 0 for dims in lstm_dims: with tf.compat.v1.variable_scope('lstm_layer_' + str(count)): self.lstm_layers.append(lstm.lstm(last_dims, dims)) self.trainable_variables += self.lstm_layers[ -1].trainable_variables last_dims = dims count += 1 for dims in fc_dims_end: with tf.compat.v1.variable_scope('fc_layer_' + str(count)): self.fc_layers.append(fc.fc(last_dims, dims)) self.trainable_variables += self.fc_layers[ -1].trainable_variables last_dims = dims count += 1 self.output = fc.fc(last_dims, 10)
def get_model(is_training,params): if(params["model"]=="lstm"): model = lstm( is_training, params) return model
def make_bisequence_encoder(seqlen, num_layer, num_hidden, dropout=0., vocab_size=0, num_embed=0, with_embedding=False): forward_param_cells = [] forward_last_state = [] backward_param_cells = [] backward_last_state = [] for i in xrange(num_layer): forward_param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable('fwd_en_l%d_i2h_weight' % i), i2h_bias=mx.sym.Variable('fwd_en_l%d_i2h_bias' % i), h2h_weight=mx.sym.Variable('fwd_en_l%d_h2h_weight' % i), h2h_bias=mx.sym.Variable('fwd_en_l%d_h2h_bias' % i))) forward_last_state.append(LSTMState(c=mx.sym.Variable('fwd_l%d_init_c' % i), h=mx.sym.Variable('fwd_l%d_init_h' % i))) backward_param_cells.append(LSTMParam(i2h_weight=mx.sym.Variable('bwd_en_l%d_i2h_weight' % i), i2h_bias=mx.sym.Variable('bwd_en_l%d_i2h_bias' % i), h2h_weight=mx.sym.Variable('bwd_en_l%d_h2h_weight' % i), h2h_bias=mx.sym.Variable('bwd_en_l%d_h2h_bias' % i))) backward_last_state.append(LSTMState(c=mx.sym.Variable('bwd_l%d_init_c' % i), h=mx.sym.Variable('bwd_l%d_init_h' % i))) assert len(forward_last_state) == num_layer == len(backward_last_state) encoder_input = mx.sym.Variable('encoder_input') if with_embedding is True: assert vocab_size > 0 and num_embed > 0 encoder_embed_weight = mx.sym.Variable('en_embed_weight') encoder_input = mx.sym.Embedding(data=encoder_input, input_dim=vocab_size, output_dim=num_embed, weight=encoder_embed_weight, name='encoder_embed') slice_input = mx.sym.SliceChannel(data=encoder_input, num_outputs=seqlen, axis=1, squeeze_axis=1) for seqidx in xrange(seqlen): hidden = slice_input[seqidx] for i in xrange(num_layer): if i == 0: dp_ratio = 0 else: dp_ratio = dropout next_state = lstm(data=hidden, num_hidden=num_hidden, seqidx=seqidx, layeridx=i, param=forward_param_cells[i], prev_state=forward_last_state[i], dropout=dp_ratio) hidden = next_state.h forward_last_state[i] = next_state for seqidx in xrange(seqlen - 1, -1, -1): hidden = slice_input[seqidx] for i in xrange(num_layer): if i == 0: dp_ratio = 0 else: dp_ratio = dropout next_state = lstm(data=hidden, num_hidden=num_hidden, seqidx=seqidx, layeridx=i, param=backward_param_cells[i], prev_state=backward_last_state[i], dropout=dp_ratio) hidden = next_state.h backward_last_state[i] = next_state last_state = [] for i in xrange(num_layer): fwd_state = forward_last_state[i] bwd_state = backward_last_state[i] combine_c = fwd_state.c + bwd_state.c combine_h = fwd_state.h + bwd_state.h last_state.append(LSTMState(c=combine_c, h=combine_h)) return last_state
from lstm import lstm import pickle from util_files.Constants import data_folder, models_folder model_name = "bestsem.p" sls = lstm(models_folder + model_name, load=True) print model_name test = pickle.load(open(data_folder + "semtest.p", 'rb')) print sls.check_error(test) #Mean Squared Error,Pearson, Spearman
import numpy as np import lstm import time c = lstm.lstm() # initializes the weights and the inputs randomly input_size = 784 hidden_size = 256 batch_size = 64 W_bi = np.random.random((hidden_size, input_size)).astype(np.float32) U_bi = np.random.random((hidden_size, hidden_size)).astype(np.float32) b_bi = np.random.random((hidden_size,1)).astype(np.float32) # Remove the 1 from the 4 biases to trigger the code generation bug. W_ig = np.random.random((hidden_size, input_size)).astype(np.float32) U_ig = np.random.random((hidden_size, hidden_size)).astype(np.float32) b_ig = np.random.random((hidden_size,1)).astype(np.float32) W_fg = np.random.random((hidden_size, input_size)).astype(np.float32) U_fg = np.random.random((hidden_size, hidden_size)).astype(np.float32) b_fg = np.random.random((hidden_size,1)).astype(np.float32) W_og = np.random.random((hidden_size, input_size)).astype(np.float32) U_og = np.random.random((hidden_size, hidden_size)).astype(np.float32) b_og = np.random.random((hidden_size,1)).astype(np.float32) input = np.random.random((input_size,batch_size)).astype(np.float32) prev_output = np.random.random((hidden_size,batch_size)).astype(np.float32) prev_cell = np.random.random((hidden_size, batch_size)).astype(np.float32) assert (np.dot(W_bi,input).shape == prev_output.shape) assert (np.dot(U_bi,prev_output).shape == prev_output.shape)
def lstm_unroll(num_lstm_layer, seq_len, input_size, num_hidden, num_label, dropout=0.): cls_weight = mx.sym.Variable("cls_weight") cls_bias = mx.sym.Variable("cls_bias") param_cells = [] last_states = [] for i in range(num_lstm_layer): param_cells.append(LSTMParam(i2h_weight = mx.sym.Variable("l%d_i2h_weight" % i), i2h_bias = mx.sym.Variable("l%d_i2h_bias" % i), h2h_weight = mx.sym.Variable("l%d_h2h_weight" % i), h2h_bias = mx.sym.Variable("l%d_h2h_bias" % i))) state = LSTMState(c=mx.sym.Variable("l%d_init_c" % i), h=mx.sym.Variable("l%d_init_h" % i)) last_states.append(state) assert(len(last_states) == num_lstm_layer) data = mx.sym.Variable('data') label = mx.sym.Variable('softmax_label') dataSlice = mx.sym.SliceChannel(data=data, num_outputs=seq_len, squeeze_axis=1) hidden_all = [] for seqidx in range(seq_len): hidden = dataSlice[seqidx] # stack LSTM for i in range(num_lstm_layer): if i==0: dp=0. else: dp = dropout next_state = lstm(num_hidden, indata=hidden, prev_state=last_states[i], param=param_cells[i], seqidx=seqidx, layeridx=i, dropout=dp) hidden = next_state.h last_states[i] = next_state # decoder if dropout > 0.: hidden = mx.sym.Dropout(data=hidden, p=dropout) hidden_all.append(hidden) hidden_concat = mx.sym.Concat(*hidden_all, dim=0) pred = mx.sym.FullyConnected(data=hidden_concat, num_hidden=num_label, weight=cls_weight, bias=cls_bias, name='pred') ################################################################################ # Make label the same shape as our produced data path # I did not observe big speed difference between the following two ways label = mx.sym.transpose(data=label) label = mx.sym.Reshape(data=label, target_shape=(0,)) #label_slice = mx.sym.SliceChannel(data=label, num_outputs=seq_len) #label = [label_slice[t] for t in range(seq_len)] #label = mx.sym.Concat(*label, dim=0) #label = mx.sym.Reshape(data=label, target_shape=(0,)) ################################################################################ sm = mx.sym.SoftmaxOutput(data=pred, label=label, name='softmax') return sm