def train(self): #Model self.lstm_model = LSTM(self.input_size, self.output_size, self.nb_neurons) self.lstm_model.load_state_dict( torch.load("/Users/baptiste/Desktop/training/AAPL_36.pth")) loss_function = nn.MSELoss() optimizer = torch.optim.Adam(self.lstm_model.parameters(), lr=self.learning_rate) print("Start training") for epoch in range(nb_epochs): for (x, y) in self.training_dataloader: optimizer.zero_grad() self.lstm_model.hidden_cell = (torch.zeros( 1, self.batch_size, self.lstm_model.nb_neurons), torch.zeros( 1, self.batch_size, self.lstm_model.nb_neurons)) pred = self.lstm_model(x.float()) y = y.view(self.batch_size, 1) loss = loss_function(pred, y) loss.backward() optimizer.step() print("epoch n°%s : loss = %s" % (epoch, loss.item())) self.validate() if epoch % 5 == 1: model_name = "%s_%s.pth" % (self.stock, epoch) torch.save(self.lstm_model.state_dict(), os.path.join(output_path, model_name))
def main(): model = LSTM(settings.vocab_size, settings.word_embedding_size, settings.hidden_size, settings.num_layers, settings.out_dim, settings.drop_out) ''' pre-train word embedding init ''' dataset = Dataset(args.data) model.word_embed.weight = nn.Parameter(torch.from_numpy(dataset.get_wordembedding())) if torch.cuda.is_available(): torch.cuda.manual_seed(settings.seed) model.cuda() optimizer = optim.SGD(model.parameters(), lr=settings.lr, weight_decay=1e-5) criteria = nn.CrossEntropyLoss() best_dev_acc = 0.0 best_test_acc = 0.0 for i in xrange(dataset.size/settings.batch_size*settings.max_epochs): batch_data = dataset.get_batch() loss = train(model, batch_data, optimizer, criteria) if (i+1) % settings.validate_freq == 0: print "validating..." dev_acc = test(model, dataset.dev_data) test_acc = test(model, dataset.test_data) if dev_acc > best_dev_acc: best_dev_acc = dev_acc best_test_acc = test_acc torch.save(model, os.path.join(args.model_dir, "sa_{}.model".format(best_dev_acc))) with open(os.path.join(args.model_dir, "log.txt"), "a") as logger: logger.write("epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}\n".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)) print "epoch: {}, dev acc: {}, test acc: {}, " \ "batch loss: {}, best dev acc:{}, best test acc:{}".format(i*settings.batch_size/float(dataset.size), dev_acc, test_acc, loss.cpu().numpy()[0], best_dev_acc, best_test_acc)
def __call__(self, number_of_iterations = 2, learning_rate = 0.005, embedding_size = 300, hidden_size=100, batch_size=100): print("Starting 'Image Retrieval' in 'LSTM' mode with '" + self.difficulty + "' data") self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".pty" self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(learning_rate) + "_" + str(embedding_size) + ".csv" self.number_of_iterations = number_of_iterations self.learning_rate = learning_rate self.embedding_size = embedding_size self.hidden_size = hidden_size self.batch_size = batch_size self.model = LSTM(self.nwords, self.embedding_size, self.image_feature_size, self.output_vector_size, self.hidden_size, self.batch_size) self.criterion = nn.CrossEntropyLoss() self.evaluate = Evaluate(self.model, self.img_features, self.minibatch, self.preprocess, self.image_feature_size, self.output_vector_size) print(self.model) self.optimizer = optim.Adam(self.model.parameters(), lr=self.learning_rate) self.train_loss_values = [] self.magic() self.save_model() self.save_data()
def apply_morph_only_rnn_gru(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph 1. morph lookup -> dropout 2. MorphStructRNN 3. lstm -> dropout 4. lstm -> maxout -> dropout 5. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] #morph lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb') src_morph_emb = table_morph.apply(src_morph, emb_morph_range) self.layers.append(table_morph) if self.dropout < 1.0: src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout) morph_layer_1st = MorphStructRNN(self.n_emb_morph, self.n_hids, 'gru') hiddens = morph_layer_1st.apply(src_morph_emb, src_morph_mask) self.layers.append(morph_layer_1st) rnn_layer_2rd = LSTM(self.n_hids , self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_3nd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_3nd.apply(hiddens , src_mask) self.layers.append(rnn_layer_3nd) if True: maxout = MaxoutLayer() src_morph_merge_emb = src_morph_emb.sum(2) src_morph_mask = src_morph_mask.max(axis=2) #src_morph_merge_emb : sentence * batch * n_emb_morph states = T.concatenate([src_morph_merge_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_morph + self.n_hids, self.n_hids, src_morph_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def evaluateAccuracy(data: list, network: LSTM, seq_length: int, sign_to_int): #network.eval() hidden = network.initHidden() memory = network.initMemory() hidden = hidden.to(device) memory = memory.to(device) print(len(data)) right = 0 total = 0 with torch.no_grad(): for i in range(0, len(data), seq_length): in_seq = convert_to_one_hot_matrix(data[i:i + seq_length], sign_to_int) out_seq = target_tensor(data[i + 1:i + seq_length + 1], sign_to_int) in_seq = in_seq.to(device) out_seq = out_seq.to(device) out_seq.unsqueeze_(-1) if i % 100000 == 0: print(i) for j in range(out_seq.size()[0]): output, hidden, memory = network(in_seq[j], hidden, memory) _, guess = output.max(1) if guess == out_seq[j]: right = right + 1 total = total + 1 print("finished eval loop") print(total) print(right) res = right / total print("finished calculating accuracy") print(res) return
def __init__(self, model_path="model.pt"): self.model = LSTM(45, 256, 45) self.model.load_state_dict(torch.load(model_path, map_location=device)) self.sequence_length = 64 with open("data/dictionary.json", "r") as f: self._mappings = json.load(f) self.melody = None
def apply_normal(self, sentence, sentence_mask, use_noise=1, use_maxout=True): """ sentence : sentence * batch 1. word lookup -> dropout 2. lstm -> dropout 3. lstm -> maxout -> dropout 4. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] emb_lstm_range = T.arange(self.n_emb_lstm) #word lookup table table = DynamicMixLookupTable(self.n_emb_lstm, **self.cfig) #table = DynamicLookupTable(self.n_emb_lstm, **self.cfig) #table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb') src_emb = table.apply(src, emb_lstm_range) self.src_emb = src_emb self.layers.append(table) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) rnn_layer_1st = LSTM(self.n_emb_lstm, self.n_hids) hiddens , cells = rnn_layer_1st.apply(src_emb, src_mask) self.layers.append(rnn_layer_1st) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if use_maxout: maxout = MaxoutLayer() states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) #hier_softmax_layer = HierarchicalSoftmax(hiddens, self.n_hids, self.vocab_size) #self.layers.append(hier_softmax_layer) #self.cost = hier_softmax_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def create_model(): model = LSTM(input_size=input_size, num_classes=num_classes, hidden=args.hidden_unit, num_layers=args.num_layers, mean_after_fc=args.mean_after_fc, mask_empty_frame=args.mask_empty_frame) model.cuda() optimizer = torch.optim.Adam(model.parameters(), lr=args.lr) return (model, optimizer)
def main(): print 'Loading Data' x = cPickle.load(open('english_matrices.pkl', 'rb')) y = cPickle.load(open('chinese_matrices.pkl', 'rb')) print 'Done' # x = np.random.random((10, 10, 50, 1)) # y = np.random.random((10, 10, 50, 1)) encoder_lstm = LSTM(50, 100, 50) encoder_lstm.load_weights('encoder.pkl') outputs = [] for i in range(10000): outputs.append(encoder_lstm.predict(x[i])) # for _ in range(10): # for i in range(20): # idx_start = i*500 # idx_end = min((i+1)*500, len(x)) # sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end)) # train(encoder_lstm, x[idx_start:idx_end], y[idx_start:idx_end][0], 50, 'encoder') # encoder_lstm.save_weights('encoder.pkl') # outputs = encoder_lstm.predict(x[:10000]) # encoder_lstm.save_weights('encoder.pkl') embed() decoder_lstm = LSTM(50, 100, 50) for _ in range(4): for i in range(20): idx_start = i * 500 idx_end = min((i + 1) * 500, len(x)) sys.stdout.write('\n\nTraining Data %d - %d' % (idx_start, idx_end)) train(decoder_lstm, outputs[idx_start:idx_end], y[idx_start:idx_end], 50, 'decoder') decoder_lstm.save_weights('decoder.pkl')
def main(opt): train_dataset = BADataset(opt.dataroot, opt.L, True, False, False) train_dataloader = BADataloader(train_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) valid_dataset = BADataset(opt.dataroot, opt.L, False, True, False) valid_dataloader = BADataloader(valid_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) test_dataset = BADataset(opt.dataroot, opt.L, False, False, True) test_dataloader = BADataloader(test_dataset, batch_size=opt.batchSize, \ shuffle=True, num_workers=opt.workers, drop_last=True) all_dataset = BADataset(opt.dataroot, opt.L, False, False, False) all_dataloader = BADataloader(all_dataset, batch_size=opt.batchSize, \ shuffle=False, num_workers=opt.workers, drop_last=False) opt.n_edge_types = train_dataset.n_edge_types opt.n_node = train_dataset.n_node opt.n_existing_node = all_node_num net = LSTM(opt, hidden_state=opt.state_dim*5) net.double() print(net) criterion = nn.CosineSimilarity(dim=1, eps=1e-6) if opt.cuda: net.cuda() criterion.cuda() optimizer = optim.Adam(net.parameters(), lr=opt.lr) early_stopping = EarlyStopping(patience=opt.patience, verbose=True) os.makedirs(OutputDir, exist_ok=True) train_loss_ls = [] valid_loss_ls = [] test_loss_ls = [] for epoch in range(0, opt.niter): train_loss = train(epoch, train_dataloader, net, criterion, optimizer, opt) valid_loss = valid(valid_dataloader, net, criterion, opt) test_loss = test(test_dataloader, net, criterion, opt) train_loss_ls.append(train_loss) valid_loss_ls.append(valid_loss) test_loss_ls.append(test_loss) early_stopping(valid_loss, net, OutputDir) if early_stopping.early_stop: print("Early stopping") break df = pd.DataFrame({'epoch':[i for i in range(1, len(train_loss_ls)+1)], 'train_loss': train_loss_ls, 'valid_loss': valid_loss_ls, 'test_loss': test_loss_ls}) df.to_csv(OutputDir + '/loss.csv', index=False) net.load_state_dict(torch.load(OutputDir + '/checkpoint.pt')) inference(all_dataloader, net, criterion, opt, OutputDir)
def setup_model(model_name, dataset_name, model_path, device): """Sets up language-model (LSTM) on device based on its designated filename.""" device = torch.device(device) batch_size = 20 data_loader = DataLoader(dataset_name, batch_size, device, 70) model = LSTM(vocab_size=len(data_loader.corpus.dictionary), device=device, \ batch_size=batch_size) model_path = os.path.join(model_path, model_name) print("loading model state_dict...") print("model_path", model_path) model.load_state_dict( torch.load(model_path, map_location=torch.device(device))['model']) return model, data_loader, batch_size
def apply_model(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] src_emb = lookup_layer('word',src) #src_morph_emb : sentence * batch * morph * n_emb_morph #src_morph_emb = lookup_layer('morph',src) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) rnn_layer_1rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_1rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_1rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if True: maxout = MaxoutLayer() #src_emb : sentence * batch * n_emb #hiddens : sentence * batch * hids states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def synthesize_midi(): int_to_sign = loade_data('./int2sign.json') sign_to_int = loade_data('./sign2int.json') network = LSTM(hidden_size=64, input_size=90, output_size=90) load_network(network, "net.pth") notes = synthesize_notes(network, "C3", 100, sign_to_int, int_to_sign) create_midi(notes)
def main(): np.random.seed(RANDOM_SEED) # 方便结果重现 data = pd.read_csv(DATA_PATH) scaler, data = preprocessing_data.process_normalization( data=data, useful_colmns=[ 'Open', 'High', 'Low', 'Close', 'Volume', 'Market Cap', 'Weekday', 'Increase' ]) training_set, test_set = generate_batch.split_data(data) training_inputs, train_outputs = generate_batch.generate_batchs( training_set) test_inputs, test_outputs = generate_batch.generate_batchs(test_set) model = LSTM.build_model(inputs=training_inputs, output_size=1, LSTM_units=20) history = model.fit(training_inputs, train_outputs, batch_size=1, epochs=30, verbose=2, shuffle=True) fig, ax1 = plt.subplots(1, 1) ax1.plot(history.epoch, history.history['loss']) ax1.set_title('Training loss') ax1.set_ylabel('Mean Absolute Error (MAE)', fontsize=12) ax1.set_xlabel('# Epochs', fontsize=12) plt.show() h = model.predict(test_inputs) dat = np.concatenate((test_outputs, h), axis=1) dat = dat * scaler.data_range_[-1] + scaler.data_min_[-1] df = pd.DataFrame(dat, columns=['actual', 'prediction']) df.to_csv('prediction.csv', index=False)
def main(cfg): if cfg['model'] == 'mlp': net = MLP(300, 768, cfg['class_num']) elif cfg['model'] == 'cnn': net = CNN(300, 768, cfg['class_num']) elif cfg['model'] == 'lstm': net = LSTM(300, cfg['class_num'], cfg['device']) elif cfg['model'] == 'gru': net = GRU(300, cfg['class_num'], cfg['device']) else: raise Exception(f'model {args.model} not available') if cfg['device'] == 'cuda': if len(cfg['gpu_ids']) == 1: torch.cuda.set_device(cfg['gpu_ids'][0]) net = net.cuda() else: net = net.cuda() net = nn.DataParallel(net, device_ids=cfg['gpu_ids']) torch.backends.cudnn.benchmark = True if cfg['mode'] == 'train': train(cfg, net) elif cfg['mode'] == 'predict': predict(cfg, net, 'checkpoints/{}.pth'.format(cfg['model']))
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens , cells = rnn.apply(state_below, src_mask) self.layers.append(rnn) #if self.dropout < 1.0: # hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn2 = FLSTM(self.n_hids, self.n_hids) hiddens , cells = rnn2.apply(hiddens , hiddens , src_mask) self.layers.append(rnn2) #rnn = NormalRNN(n_emb_lstm , self.n_hids) #hiddens = rnn.apply(state_below, src_mask) #self.layers.append(rnn) if True: maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def show_result(self): files = os.listdir(self.output) for file in files: if ".pth" in file: path = os.path.join(self.output, file) lstm_model = LSTM(self.input_size, self.output_size, self.nb_neurons) lstm_model.load_state_dict(torch.load(path)) lstm_model.eval() print("model : %s loaded" % path) predictions = [] for (x, _) in self.testing_dataloader: if x.shape[0] == self.batch_size: with torch.no_grad(): lstm_model.hidden_cell = ( torch.zeros(1, self.batch_size, lstm_model.nb_neurons), torch.zeros(1, self.batch_size, lstm_model.nb_neurons)) output = lstm_model(x.float()) output = self.data.unnormalizeData( output).squeeze() predictions += output.tolist() plt.plot(predictions, label="prediction") plt.plot(self.real_data_test, label="target") plt.title(file) plt.legend() plt.show()
def train_model(stock, col): data = pd.read_csv( f'../data/ashare/{stock}.csv', encoding='gbk', converters={0: lambda x: datetime.strptime(x, '%Y-%m-%d')}) data = data.sort_index(ascending=False) training_set = data.iloc[:, col].values sc = MinMaxScaler() training_data = sc.fit_transform(training_set.reshape(-1, 1)) # print(training_data) num_classes = 2 seq_length = 8 x, y = sliding_windows(training_data, seq_length, num_classes) print(x.shape) print(y.shape) train_size = int(len(y) * 0.67) test_size = len(y) - train_size trainX = Variable(torch.Tensor(np.array(x[0:train_size]))) trainY = Variable(torch.Tensor(np.array(y[0:train_size]))) # print(trainX) # print(trainY) testX = Variable(torch.Tensor(np.array(x[train_size:len(x)]))) testY = Variable(torch.Tensor(np.array(y[train_size:len(y)]))) num_epochs = 1500 learning_rate = 0.01 input_size = 1 hidden_size = 2 num_layers = 1 lstm = LSTM(num_classes, input_size, hidden_size, num_layers, seq_length) train(lstm, num_epochs, num_classes, trainX, trainY, learning_rate) torch.save(lstm.state_dict(), f'../data/ashare/models/{stock}-col{col}-8-2.pt')
def synthesize_notes(network: LSTM, inputs, n: int, sign_to_int: dict, int_to_sign: dict): seq = [] hidden = network.initHidden() memory = network.initMemory() inputs = convert_to_one_hot_matrix([inputs] * 2, sign_to_int) with torch.no_grad(): for i in range(n): p, hidden, memory = network(inputs, hidden, memory) p = p.numpy()[0][0][:] ind = np.random.choice((p.shape[0]), 1, p=p / sum(p))[0] inputs = torch.zeros(1, 1, len(sign_to_int)) inputs[0][0][ind] = 1 seq.append(int_to_sign[str(ind)]) return seq
def main(): parser = argparse.ArgumentParser() parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--dropout-embedding-softmax", "-dos", type=float, default=0.5) parser.add_argument("--dropout-rnn", "-dor", type=float, default=0.2) parser.add_argument("--ndim-hidden", "-dh", type=int, default=640) parser.add_argument("--num-layers", "-nl", type=int, default=2) parser.add_argument("--num-to-generate", "-n", type=int, default=100) parser.add_argument("--model-filename", "-m", type=str, default="model.hdf5") parser.add_argument("--vocab-filename", "-v", type=str, default="vocab.pkl") args = parser.parse_args() assert args.num_layers > 0 assert args.ndim_hidden > 0 assert os.path.isfile(args.vocab_filename) is True with open(args.vocab_filename, "rb") as f: vocab_str_id = pickle.load(f) vocab_id_str = pickle.load(f) vocab_size = len(vocab_str_id) lstm = LSTM(vocab_size=vocab_size, ndim_hidden=args.ndim_hidden, num_layers=args.num_layers, dropout_embedding_softmax=args.dropout_embedding_softmax, dropout_rnn=args.dropout_rnn) assert lstm.load(args.model_filename) for n in range(args.num_to_generate): lstm.reset_state() x_sequence = np.asarray([ID_EOS]).astype(np.int32)[None, :] for t in range(1000): distribution = functions.softmax(lstm(x_sequence[:, t])).data[0] y_data = np.random.choice(np.arange(distribution.size), size=1, p=distribution).astype(np.int32) x_sequence = np.concatenate((x_sequence, y_data[None, :]), axis=1) if y_data[0] == ID_EOS: break tokens = [] for t in range(1, x_sequence.size - 2): tokens.append(vocab_id_str[x_sequence[0, t]]) print(" ".join(tokens))
def validate(): stock = "MC.PA" directory = "/Users/baptiste/Desktop/training" input_size = 4 output_size = 4 nb_neurons = 200 test_split = 0.1 time_window = 5 dataloader = Data(stock) df = dataloader.getData() real_data = df.to_numpy() df_normalized = dataloader.normalizeData(df) df_normalized = torch.FloatTensor(df_normalized.to_numpy()) test_split = int(test_split * df.shape[0]) real_test_split = real_data[-test_split:-time_window:, 3] testing_split = df_normalized[-test_split:, :] files = os.listdir(directory) for file in files: if ".pth" in file: path = os.path.join(directory, file) lstm_model = LSTM(input_size, output_size, nb_neurons) lstm_model.load_state_dict(torch.load(path)) print("model : %s loaded" % path) lstm_model.eval() predictions = [] for i in range(testing_split.shape[0] - time_window): x_test = testing_split[i:i + time_window] with torch.no_grad(): lstm_model.hidden_cell = (torch.zeros( 1, 1, lstm_model.nb_neurons), torch.zeros( 1, 1, lstm_model.nb_neurons)) predictions.append( dataloader.unnormalizeData( lstm_model(x_test).tolist())) predictions = np.array(predictions)[:, 3, 0] #plt.figure(15,10) plt.plot(real_test_split, label="target") plt.plot(predictions, label="prediction") plt.title(file) plt.legend() plt.show()
def train(): train_writer = SummaryWriter( os.path.join(LOG_DIR, 'train7-64-LSTM-Doppler')) test_writer = SummaryWriter(os.path.join(LOG_DIR, 'test7-64-LSTM-Doppler')) train_loader, test_loader = load_data(TRAIN_DIR, TEST_DIR) lstm = LSTM().to(DEVICE) optimizer = torch.optim.Adam(lstm.parameters(), lr=LR) loss_func = nn.CrossEntropyLoss().to(DEVICE) for epoch in range(MAX_EPOCH): log_string('**** EPOCH %3d ****' % (epoch)) sys.stdout.flush() train_one_epoch(epoch, train_writer, train_loader, lstm, loss_func, optimizer) eval_one_epoch(epoch, test_writer, test_loader, lstm, loss_func) # save model parameters to files torch.save(lstm.state_dict(), MODEL_DIR)
def main(): names_str = read_csv(filname='data/names/names.csv') all_char_str = set([char for name in names_str for char in name]) char2idx = {char: i for i, char in enumerate(all_char_str)} char2idx['EOS'] = len(char2idx) # save char dictionary cPickle.dump(char2idx, open("dic.p", "wb")) names_idx = [[char2idx[char_str] for char_str in name_str] for name_str in names_str] # build model model = LSTM(input_dim=len(char2idx), embed_dim=100, hidden_dim=128) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters()) n_iters = 5 for iter in range(1, n_iters + 1): # data shuffle random.shuffle(names_idx) total_loss = 0 for i, name_idx in enumerate(names_idx): input = inputTensor(name_idx) target = targetTensor(name_idx, char2idx) loss = train(model, criterion, input, target) total_loss += loss optimizer.step() print(iter, "/", n_iters) print("loss {:.4}".format(float(total_loss / len(names_idx)))) # save trained model torch.save(model.state_dict(), "model.pt")
def model_fn(model_dir): """Load the PyTorch model from the `model_dir` directory.""" print("Loading model.") # First, load the parameters used to create the model. model_info = {} model_info_path = os.path.join(model_dir, 'model_info.pth') with open(model_info_path, 'rb') as f: model_info = torch.load(f) print("model_info: {}".format(model_info)) # Determine the device and construct the model. device = torch.device("cpu" if torch.cuda.is_available() else "cpu") #model = LSTM(model_info['embedding_dim'], model_info['hidden_dim'], model_info['vocab_size']) model = LSTM(model_info['num_classes'], model_info['input_size'], model_info['hidden_size'], model_info['num_layers']) # Load the stored model parameters. model_path = os.path.join(model_dir, 'model.pth') with open(model_path, 'rb') as f: model.load_state_dict(torch.load(f)) model.to(device).eval() print("Done loading model.") return model
def main(opt): model = LSTM(opt, batch_first=True, dropout=opt.dropout) if opt.pre_train: model.load_state_dict(torch.load(opt.save_path)) optimizer = optim.Adam(model.parameters(), opt.learning_rate) mseloss = nn.MSELoss() dataset = PowerDataset(opt, prepocess_path=opt.prepocess_path, transform=transforms.Compose( [transforms.ToTensor()])) train_dataset = data.Subset(dataset, indices=range(8664)) test_dataset = data.Subset(dataset, indices=range(8664, len(dataset))) train_dataloader = data.dataloader.DataLoader(train_dataset, num_workers=opt.n_threads, batch_size=opt.batch_size, shuffle=True) test_sampler = data.SequentialSampler(test_dataset) test_dataloader = data.dataloader.DataLoader( test_dataset, num_workers=opt.n_threads, batch_size=opt.test_batch_size, shuffle=False, sampler=test_sampler) for e in range(opt.epochs): if opt.test_only: test(model, test_dataloader) break print('epoch: ', e) train(model, mseloss, optimizer, train_dataloader) test(model, test_dataloader) torch.save(model.state_dict(), opt.save_path)
def infer(minmax, data_train, data_test): device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') # lstm_train_model = LSTM() model = LSTM().to(device) model.load_state_dict( torch.load("D:\stock\weights\checkpont_67.27376310428824.pth")) model.eval() test_size = len(data_test) future_day = test_size timestamp = 5 output_predict = np.zeros( (data_train.shape[0] + future_day, data_train.shape[1])) output_predict[0] = data_train.iloc[0] for k in range(0, (data_train.shape[0] // timestamp) * timestamp, timestamp): index = min(k + timestamp, output_predict.shape[0] - 1) batch_x = np.expand_dims(df.iloc[k:index, :].values, axis=0) batch_y = df.iloc[k + 1:index + 1, :].values batch_x = torch.Tensor(batch_x).to(device) batch_y = torch.Tensor(batch_y).to(device) out_logits = model(batch_x) # init_value = last_state output_predict[k + 1:k + timestamp + 1] = out_logits.cpu().detach().numpy()[0] output_predict = minmax.inverse_transform(output_predict) return output_predict
def train(config): train_data = pickle.load(open(os.path.join(config.data_path, config.train_name), "rb")) dev_data = pickle.load(open(os.path.join(config.data_path, config.dev_name), "rb")) test_data = pickle.load(open(os.path.join(config.data_path, config.test_name), "rb")) vocabulary = pickle.load(open(os.path.join(config.data_path, config.vocabulary_name), "rb")) # load w2v data weight = pickle.load(open(os.path.join(config.data_path, config.weight_name), "rb")) if config.task_name == "lstm": text_model = LSTM(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "lstm_maxpool": text_model = LSTM_maxpool(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "rnn": text_model = RNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) elif config.task_name == "cnn": text_model = CNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels) elif config.task_name == "cnn_w2v": text_model = CNN_w2v(vocab_size=len(vocabulary), embed_dim=config.embed_dim, class_num=config.class_num, kernel_num=config.kernel_num, kernel_sizes=config.kernel_sizes, dropout=config.dropout, static=config.static, in_channels=config.in_channels, weight=weight) elif config.task_name == "rcnn": text_model = RCNN(vocab_size=len(vocabulary), embed_dim=config.embed_dim, output_dim=config.class_num, hidden_dim=config.hidden_dim, num_layers=config.num_layers, dropout=config.dropout) optimizer = Adam(lr=config.lr, weight_decay=config.weight_decay) timing = TimingCallback() early_stop = EarlyStopCallback(config.patience) accuracy = AccuracyMetric(pred='output', target='target') trainer = Trainer(train_data=train_data, model=text_model, loss=CrossEntropyLoss(), batch_size=config.batch_size, check_code_level=0, metrics=accuracy, n_epochs=config.epoch, dev_data=dev_data, save_path=config.save_path, print_every=config.print_every, validate_every=config.validate_every, optimizer=optimizer, use_tqdm=False, device=config.device, callbacks=[timing, early_stop]) trainer.train() # test result tester = Tester(test_data, text_model, metrics=accuracy) tester.test()
def get_model(input_size, embed_size, output_size, model_type, dropout=DROPOUT): if model_type.lower() == 'lstm': return LSTM(input_size, embed_size, output_size, dropout) if model_type.lower() == 'cnn': return CNN(input_size, embed_size, output_size, dropout) if model_type.lower() == 'gru': return GRU(input_size, embed_size, output_size, dropout) else: return None
def train(network: LSTM, criterion, input_seq, follow_seq, optimizer: optim.Optimizer, scheduler): follow_seq.unsqueeze_(-1) hidden = network.initHidden() memory = network.initMemory() hidden = hidden.to(device) memory = memory.to(device) loss = 0 network.zero_grad() for i in range(input_seq.size()[0]): output, hidden, memory = network(input_seq[i], hidden, memory) l = criterion(output, follow_seq[i]) #TODO: smooth loss here loss += l loss.backward() optimizer.step() scheduler.step() return output, loss.item() / input_seq.size()[0]
def main(): global args, best_prec1 best_prec1 = 1e6 args = parser.parse_args() args.original_lr = 1e-6 args.lr = 1e-6 args.momentum = 0.95 args.decay = 5 * 1e-4 args.start_epoch = 0 args.epochs = 5000 args.steps = [-1, 1, 100, 150] args.scales = [1, 1, 1, 1] args.workers = 4 args.seed = time.time() args.print_freq = 30 args.feature_size = 100 args.lSeq=5 wandb.config.update(args) wandb.run.name = f"Default_{wandb.run.name}" if (args.task == wandb.run.name) else f"{args.task}_{wandb.run.name}" conf = configparser.ConfigParser() conf.read(args.config) # print(conf) TRAIN_DIR = conf.get("lstm", "train") VALID_DIR = conf.get("lstm", "valid") TEST_DIR = conf.get("lstm", "test") LOG_DIR = conf.get("lstm", "log") create_dir_not_exist(LOG_DIR) # TODO: train_list to train_file train_list = [os.path.join(TRAIN_DIR, item) for item in os.listdir(TRAIN_DIR)] val_list = [os.path.join(VALID_DIR, item) for item in os.listdir(VALID_DIR)] os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu torch.cuda.manual_seed(int(args.seed)) model = LSTM(args.feature_size, args.feature_size, args.feature_size) model = model.cuda() criterion = nn.MSELoss().cuda() optimizer = torch.optim.Adam(model.parameters(), args.lr, betas=(0.9, 0.999), eps=1e-08, weight_decay=args.decay) model = DataParallel_withLoss(model, criterion) for epoch in range(args.start_epoch, args.epochs): adjust_learning_rate(optimizer, epoch) train(train_list, model, criterion, optimizer, epoch) prec1 = validate(val_list, model, criterion, epoch) with open(os.path.join(LOG_DIR, args.task + ".txt"), "a") as f: f.write("epoch " + str(epoch) + " MSELoss: " + str(float(prec1))) f.write("\n") wandb.save(os.path.join(LOG_DIR, args.task + ".txt")) is_best = prec1 < best_prec1 best_prec1 = min(prec1, best_prec1) print(' * best MSELoss {MSELoss:.3f} '.format(MSELoss=best_prec1)) save_checkpoint({ 'epoch': epoch + 1, 'arch': args.pre, 'state_dict': model.state_dict(), 'best_prec1': best_prec1, 'optimizer': optimizer.state_dict(), }, is_best, args.task, epoch=epoch, path=os.path.join(LOG_DIR, args.task))
def build_model(self, output_graph=True): self.lstm_0 = LSTM(seq_length=self.FLAGS.seq_length, hidden_size=self.FLAGS.hidden_size, lr=self.FLAGS.lstm_lr, activation=tf.nn.tanh, forget_bias=0.0, name="forget_bias_0.0") self.lstm_1 = LSTM(seq_length=self.FLAGS.seq_length, hidden_size=self.FLAGS.hidden_size, lr=self.FLAGS.lstm_lr, activation=tf.nn.tanh, forget_bias=1.0, name="forget_bias_1.0") self.lstm_5 = LSTM(seq_length=self.FLAGS.seq_length, hidden_size=self.FLAGS.hidden_size, lr=self.FLAGS.lstm_lr, activation=tf.nn.tanh, forget_bias=5.0, name="forget_bias_5.0") self.lstm_10 = LSTM(seq_length=self.FLAGS.seq_length, hidden_size=self.FLAGS.hidden_size, lr=self.FLAGS.lstm_lr, activation=tf.nn.tanh, forget_bias=10.0, name="forget_bias_10.0") self.models[self.lstm_0.name] = self.lstm_0 self.models[self.lstm_1.name] = self.lstm_1 self.models[self.lstm_5.name] = self.lstm_5 self.models[self.lstm_10.name] = self.lstm_10 self.sess.run(tf.global_variables_initializer()) # self.sess.run([self.irnn.w_assign, self.irnn.b_assign]) if output_graph: tf.summary.FileWriter("logs/", self.sess.graph)
def train_predict(state): if state == 'train': model = LSTM(len(vocab), batch_size, num_steps=num_steps, lr=learning_rate) init = tf.global_variables_initializer() saver = tf.train.Saver(max_to_keep=100) with tf.Session() as sess: sess.run(init) for ep in range(1): new_state = sess.run(model.init_state) #每次跑完一个epoch,都初始化一下状态 # print(np.shape(new_state)) counter = 0 for x, y in get_batches(encode, batch_size, num_steps): counter += 1 start = time.time() feed = { model.inputs: x, model.label: y, model.keep_prob: keep_prob, model.init_state: new_state } batch_loss, new_state, _ = sess.run( [model.loss, model.final_state, model.optimizer], feed_dict=feed) end = time.time() print('Epoch : {} / {}...'.format(ep + 1, epoches), ' Training steps :{}'.format(counter), ' Training loss : {:.4f}'.format(batch_loss), ' {:.4f} sec/batch'.format(end - start)) if counter % save_freq == 0: saver.save(sess, 'checkpoints/iter{}.ckpt'.format(counter)) print('\n--------save ok!---------\n') saver.save(sess, 'checkpoints/end_iter.ckpt') elif state == 'predict': word = '传说在天地间有一块灵石' novel = generate_novel(word, 1000) print(''.join(novel)) else: print('状态错误,请调试...')
def main(): global device device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") notes = loade_data('./notes.json')['notes'] validation = loade_data('./validation.json')['notes'] test = loade_data('./test.json')['notes'] int_to_sign = loade_data('./int2sign.json') sign_to_int = loade_data('./sign2int.json') seq_length = 100 #refactor this, we only need a one-hot for the input #select a sequence or whatever here, use predefined for now (testing) learning_rate = 0.001 network = LSTM(hidden_size=64, input_size=90, output_size=90) criterion = nn.CrossEntropyLoss() network.to(device) optimizer = optim.Adam(network.parameters(), learning_rate) scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.0001, max_lr=0.001, cycle_momentum=False) # move network to GPU print(device) #network, _, losses, best_net = trainLoop(network, criterion, notes, optimizer, 3, seq_length, sign_to_int, scheduler) best_net = network """plt.plot(losses) plt.savefig('losses.png') plt.close('all')""" print('saving network....') #save_network(best_net, "net.pth") print('evaluating on test data...') evaluateAccuracy(test, best_net, seq_length, sign_to_int) print("eval done!")
def train(): int_to_vocab, vocab_to_int, n_vocab, in_text = get_data_from_file( flags.batch_size, flags.seq_size) x_batch,y_batch = create_batch(in_text,flags.batch_size,flags.seq_size) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = LSTM(n_vocab, flags.seq_size,flags.embedding_size, flags.lstm_size).to(device) #optimizer = torch.optim.SGD(model.parameters(), lr=0.1, momentum=0.7) optimizer = torch.optim.Adam(model.parameters(), lr=0.01) loss_function = nn.CrossEntropyLoss() for e in range(flags.num_epochs): print(f'epoch #{e}: ',end="") batches = get_batches(x_batch,y_batch,flags.batch_size, flags.seq_size) (state_h_1, state_c_1),(state_h_2, state_c_2) = model.zero_state(flags.batch_size) state_h_1 = state_h_1.to(device) state_c_1 = state_c_1.to(device) state_h_2 = state_h_2.to(device) state_c_2 = state_c_2.to(device) for i,(x, y) in enumerate(batches): model.train() optimizer.zero_grad() x = torch.tensor(x , dtype=torch.int64).to(device) #print("x shape {} ".format(np.shape(x))) tmp = [] for index,el in enumerate(y) : tmp.append(np.zeros(n_vocab)) tmp[index][y[index]] = 1 #print(y) y = tmp y = torch.tensor(y , dtype=torch.int64).to(device) logits, (state_h_1, state_c_1),(state_h_2, state_c_2) = model(x, (state_h_1, state_c_1),(state_h_2, state_c_2)) #print("logits shape {} , y shape {}".format(np.shape(logits),np.shape(y))) loss = loss_function(logits, y) state_h_1 = state_h_1.detach() state_c_1 = state_c_1.detach() state_h_2 = state_h_2.detach() state_c_2 = state_c_2.detach() loss_value = loss.item() loss.backward() _ = torch.nn.utils.clip_grad_norm_(model.parameters(), flags.gradients_norm) optimizer.step() print(f'batch #{i}:\tloss={loss.item():.10f}') return model
def inference(args, cnn_features): tf.reset_default_graph() with tf.Session() as sess: net = LSTM(sess, max_seq_len=25, h_dim=1024) net.build_model() net.inference(cnn_features=cnn_features, label_file=args.label_file, gen_from=args.gen_from, out_path=args.output_folder, bsize=args.bsize)
def main(): parser = argparse.ArgumentParser() parser.add_argument("--batchsize", "-b", type=int, default=64) parser.add_argument("--seq-length", "-l", type=int, default=35) parser.add_argument("--total-epochs", "-e", type=int, default=300) parser.add_argument("--gpu-device", "-g", type=int, default=0) parser.add_argument("--grad-clip", "-gc", type=float, default=5) parser.add_argument("--learning-rate", "-lr", type=float, default=1) parser.add_argument("--weight-decay", "-wd", type=float, default=0.000001) parser.add_argument("--dropout-embedding-softmax", "-dos", type=float, default=0.5) parser.add_argument("--dropout-rnn", "-dor", type=float, default=0.2) parser.add_argument("--momentum", "-mo", type=float, default=0.9) parser.add_argument("--optimizer", "-opt", type=str, default="msgd") parser.add_argument("--ndim-hidden", "-dh", type=int, default=640) parser.add_argument("--num-layers", "-nl", type=int, default=2) parser.add_argument("--lr-decay-epoch", "-lrd", type=int, default=20) parser.add_argument("--model-filename", "-m", type=str, default="model.hdf5") parser.add_argument("--vocab-filename", "-v", type=str, default="vocab.pkl") parser.add_argument("--train-filename", "-train", default=None) parser.add_argument("--dev-filename", "-dev", default=None) parser.add_argument("--test-filename", "-test", default=None) args = parser.parse_args() assert args.num_layers > 0 assert args.ndim_hidden > 0 dataset_train, dataset_dev, dataset_test, vocab_str_id, vocab_id_str = read_data(args.train_filename, args.dev_filename, args.test_filename) dataset_dev = np.asarray(dataset_dev, dtype=np.int32) dataset_test = np.asarray(dataset_test, dtype=np.int32) assert len(dataset_train) > 0 if os.path.isfile(args.vocab_filename): with open(args.vocab_filename, "rb") as f: vocab_str_id = pickle.load(f) vocab_id_str = pickle.load(f) else: with open(args.vocab_filename, "wb") as f: pickle.dump(vocab_str_id, f) pickle.dump(vocab_id_str, f) print("#train = {}".format(len(dataset_train))) print("#dev = {}".format(len(dataset_dev))) print("#test = {}".format(len(dataset_test))) vocab_size = len(vocab_str_id) lstm = LSTM(vocab_size=vocab_size, ndim_hidden=args.ndim_hidden, num_layers=args.num_layers, dropout_embedding_softmax=args.dropout_embedding_softmax, dropout_rnn=args.dropout_rnn) lstm.load(args.model_filename) total_iterations_train = len(dataset_train) // (args.seq_length * args.batchsize) optimizer = Optimizer(args.optimizer, args.learning_rate, args.momentum) optimizer.setup(lstm.model) if args.grad_clip > 0: optimizer.add_hook(chainer.optimizer.GradientClipping(args.grad_clip)) if args.weight_decay > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) using_gpu = False if args.gpu_device >= 0: cuda.get_device(args.gpu_device).use() lstm.model.to_gpu() using_gpu = True xp = lstm.model.xp training_start_time = time.time() for epoch in range(args.total_epochs): sum_loss = 0 epoch_start_time = time.time() # training for itr in range(total_iterations_train): # sample minbatch batch_offsets = np.random.randint(0, len(dataset_train) - args.seq_length - 1, size=args.batchsize) x_batch = np.empty((args.batchsize, args.seq_length), dtype=np.int32) t_batch = np.empty((args.batchsize, args.seq_length), dtype=np.int32) for batch_index, offset in enumerate(batch_offsets): sequence = dataset_train[offset:offset + args.seq_length] teacher = dataset_train[offset + 1:offset + args.seq_length + 1] x_batch[batch_index] = sequence t_batch[batch_index] = teacher if using_gpu: x_batch = cuda.to_gpu(x_batch) t_batch = cuda.to_gpu(t_batch) # update model parameters with chainer.using_config("train", True): lstm.reset_state() loss = 0 for t in range(args.seq_length): x_data = x_batch[:, t] t_data = t_batch[:, t] y_data = lstm(x_data) loss += functions.softmax_cross_entropy(y_data, t_data) lstm.model.cleargrads() loss.backward() optimizer.update() sum_loss += float(loss.data) assert sum_loss == sum_loss, "Encountered NaN!" printr("Training ... {:3.0f}% ({}/{})".format((itr + 1) / total_iterations_train * 100, itr + 1, total_iterations_train)) lstm.save(args.model_filename) # evaluation perplexity = -1 negative_log_likelihood = 0 if epoch % 10 == 0: x_sequence = dataset_dev[:-1] t_sequence = dataset_dev[1:] seq_length_dev = len(x_sequence) if using_gpu: x_sequence = cuda.to_gpu(x_sequence)[None, :] t_sequence = cuda.to_gpu(t_sequence)[None, :] with chainer.no_backprop_mode() and chainer.using_config("train", False): lstm.reset_state() for t in range(seq_length_dev): x_data = x_sequence[:, t] t_data = t_sequence[:, t] y_data = lstm(x_data) negative_log_likelihood += float(functions.softmax_cross_entropy(y_data, t_data).data) printr("Computing perplexity ...{:3.0f}% ({}/{})".format((t + 1) / seq_length_dev * 100, t + 1, seq_length_dev)) assert negative_log_likelihood == negative_log_likelihood, "Encountered NaN!" perplexity = math.exp(negative_log_likelihood / len(dataset_dev)) clear_console() print("Epoch {} done in {} sec - loss: {:.6f} - log_likelihood: {} - ppl: {} - lr: {:.3g} - total {} min".format( epoch + 1, int(time.time() - epoch_start_time), sum_loss / total_iterations_train, int(-negative_log_likelihood), int(perplexity), optimizer.get_learning_rate(), int((time.time() - training_start_time) // 60))) if epoch >= args.lr_decay_epoch: optimizer.decrease_learning_rate(0.98, final_value=1e-5)
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm n_emb_struct = self.n_emb_struct n_emb_share = self.n_emb_share src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] if False: #(share only part of embedding) n_emb_all = n_emb_lstm + n_emb_struct - n_emb_share emb_all_range = T.arange(n_emb_all) emb_lstm_range = T.arange(n_emb_lstm) emb_struct_range = T.arange(n_emb_lstm - n_emb_share, n_emb_all) table = lookup_table(n_emb_all, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_all_range) state_below_lstm = table.apply(src, emb_lstm_range) state_below_struct = table.apply(src, emb_struct_range) self.layers.append(table) rnn = SLSTM(n_emb_lstm, n_emb_struct, n_emb_share, self.n_hids, self.n_shids, self.n_structs) #rnn = LSTM(self.n_in, self.n_hids) hiddens = rnn.merge_out(state_below, state_below_lstm, state_below_struct, src_mask) self.layers.append(rnn) elif True: # use rnn_pyramid emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = rnn_pyramid_layer(n_emb_lstm, self.n_hids) hiddens, cells, structs = rnn.apply(state_below, src_mask) self.layers.append(rnn) self.structs = structs else: # share all embedding emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens, cells = rnn.apply(state_below, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn1 = LSTM(self.n_hids, self.n_hids) hiddens, cells = rnn1.apply(hiddens, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn1) maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask) self.layers.append(maxout) #rnng = LSTM(n_emb_lstm, self.n_hids) #hiddens, cells = rnn.apply(state_below, src_mask) #hiddensg = rnng.merge_out(state_below, src_mask) #self.layers.append(rnng) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) #chunk = chunk_layer(n_lstm_in + n_lstm_out, n_lstm_out, n_chunk_out, 6) n_emb_hid = n_emb_lstm + self.n_hids emb_hid = T.concatenate([state_below, hiddens], axis=2) #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs) #hiddens = chunk.merge_out(hiddens, hiddens, src_mask, merge_how="for_struct",\ # state_below_other=state_below, n_other=n_emb_lstm) chunk = chunk_layer(n_emb_hid, self.n_hids, self.n_hids, self.n_structs) hiddens = chunk.merge_out(emb_hid, hiddens, src_mask, merge_how="for_struct",\ state_below_other=None, n_other=0) #chunk = chunk_layer(self.n_hids, self.n_hids, self.n_hids, self.n_structs) #hiddens = chunk.merge_out(hiddens, hiddensg, src_mask, merge_how="both",\ # state_below_other=state_below, n_other=n_emb_lstm) self.layers.append(chunk) # apply dropout if self.dropout < 1.0: # dropout is applied to the output of maxout in ghog hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply(self, sentence, sentence_mask, use_noise=1): n_emb_lstm = self.n_emb_lstm n_emb_struct = self.n_emb_struct n_emb_share = self.n_emb_share src = sentence[:-1] src_mask = sentence_mask[:-1] tgt = sentence[1:] tgt_mask = sentence_mask[1:] emb_lstm_range = T.arange(n_emb_lstm) table = lookup_table(n_emb_lstm, self.vocab_size, name='Wemb') state_below = table.apply(src, emb_lstm_range) self.layers.append(table) if self.dropout < 1.0: state_below = dropout_layer(state_below, use_noise, self.dropout) rnn = LSTM(n_emb_lstm, self.n_hids) hiddens, cells = rnn.apply(state_below, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn) if True: if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnn1 = LSTM(self.n_hids, self.n_hids) hiddens, cells = rnn1.apply(hiddens, src_mask) #hiddens = rnn.merge_out(state_below, src_mask) self.layers.append(rnn1) if True: if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) rnnp = rnn_pyramid_layer(self.n_hids, n_emb_lstm, self.n_hids) hiddens,cells,structs,pyramid = rnnp.apply(hiddens, state_below, src_mask) self.layers.append(rnnp) #self.structs = structs self.rnn_len = rnnp.n_steps self.sent_len = sentence.shape[0] if True: maxout = maxout_layer() states = T.concatenate([state_below, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = dropout_layer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)
def apply_morph_attention(self, sentence, sentence_mask, sentence_morph, sentence_morph_mask, use_noise=1): """ sentence : sentence * batch sentence_morph : sentence * batch * morph src_morph_emb : sentence * batch * morph * n_emb_morph 1. word morph lookup -> dropout -> attention 2. lstm -> dropout 3. lstm -> maxout -> dropout 4. logistic """ src, src_mask = sentence[:-1], sentence_mask[:-1] tgt, tgt_mask = sentence[1:], sentence_mask[1:] src_morph, src_morph_mask = sentence_morph[:-1], sentence_morph_mask[:-1] #word lookup table emb_lstm_range = T.arange(self.n_emb_lstm) table = LookupTable(self.n_emb_lstm, self.vocab_size, name='Wemb') src_emb = table.apply(src, emb_lstm_range) self.layers.append(table) #morph lookup table emb_morph_range = T.arange(self.n_emb_morph) table_morph = LookupTable(self.n_emb_morph, self.morph_size, name='Memb') src_morph_emb = table_morph.apply(src_morph, emb_morph_range) self.layers.append(table_morph) if self.dropout < 1.0: src_emb = DropoutLayer(src_emb, use_noise, self.dropout) src_morph_emb = DropoutLayer(src_morph_emb, use_noise, self.dropout) lstm_att_1st = LstmMorphAttention(self.n_hids, self.n_hids, self.n_hids) hiddens, cells = lstm_att_1st.apply(src_emb, src_morph_emb, src_mask) self.layers.append(lstm_att_1st) #print len(hiddens) , hiddens[0].ndim rnn_layer_2rd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_2rd.apply(hiddens , src_mask) self.layers.append(rnn_layer_2rd) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) rnn_layer_3nd = LSTM(self.n_hids, self.n_hids) hiddens , cells = rnn_layer_3nd.apply(hiddens, src_mask) self.layers.append(rnn_layer_3nd) if True: maxout = MaxoutLayer() #src_emb : sentence * batch * n_emb #hiddens : sentence * batch * hids states = T.concatenate([src_emb, hiddens], axis=2) maxout_n_fold = 2 hiddens = maxout.apply(states, self.n_emb_lstm + self.n_hids, self.n_hids, src_mask, maxout_n_fold) self.layers.append(maxout) if self.dropout < 1.0: hiddens = DropoutLayer(hiddens, use_noise, self.dropout) logistic_layer = LogisticRegression(hiddens, self.n_hids, self.vocab_size) self.layers.append(logistic_layer) self.cost = logistic_layer.cost(tgt, tgt_mask) for layer in self.layers: self.params.extend(layer.params) self.L2 = sum(T.sum(item ** 2) for item in self.params) self.L1 = sum(T.sum(abs(item)) for item in self.params)