def load(args, checkpoint_dir): state_dict = torch.load(os.path.join(checkpoint_dir, 'checkpoint.pth')) from collections import OrderedDict new_state_dict = OrderedDict() for k, v in state_dict.items(): if 'module' in k: namekey = k[7:] # remove `module.` else: namekey = k new_state_dict[namekey] = v if args.model_type == 'bert': config = BertConfig.from_json_file(os.path.join(checkpoint_dir, 'config.bin')) model = BertForSequenceClassification(config) model.load_state_dict(new_state_dict) elif args.model_type == 'cnn': model = CNNModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, num_filters=args.num_filters, filter_sizes=args.filter_sizes, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'lstm': model = LSTMModel(n_vocab=args.vocab_size, embed_size=args.embed_size, num_classes=args.num_labels, hidden_size=args.hidden_size, device=args.device) model.load_state_dict(new_state_dict) elif args.model_type == 'char-cnn': model = CharCNN(num_features=args.num_features, num_classes=args.num_labels) model.load_state_dict(new_state_dict) else: raise ValueError('model type is not found!') return model.to(args.device)
def start(config): global sess print(config) model = LSTMModel(config) sess = tf.InteractiveSession() sess.run(tf.global_variables_initializer()) return train(train_set, valid_set, test_set, model)
def main(args): df = pd.read_csv(args.dataset) # df = df.iloc[::24,:] # Preprocess input and reshapes to # (num_samples, window_size, 1) processor = DataProcessor(window_size=args.window_size, forecast_size=args.forecast, shift=args.shift) train_X, train_y, test_X, test_y, raw_series = processor.preprocess(df) # train or load model lstm = LSTMModel(args.window_size, args.forecast) print(lstm.model.summary()) if not args.eval_only: lstm.fit(train_X, train_y, epochs=args.epochs) lstm.save(args.model_path) else: lstm.load(args.model_path) # evaluation and plots preds = lstm.predict(test_X[-1].reshape(1, -1, 1)) preds = processor.postprocess(preds) plot_test_datapoint(test_X[-1], test_y[-1], preds[0], args.forecast) preds_moving = moving_test_window_preds(lstm, test_X[0, :], n_future_preds=1000, step=args.forecast) preds_moving = np.array(preds_moving).reshape(-1, 1) preds_moving = processor.postprocess(preds_moving) plot_moving_window(df['datetime'], raw_series, preds_moving)
def lstmTrain(args): data_loader = TextLoader('data', batchSize, numSteps) args.vocabSize = data_loader.vocab_size print args.vocabSize _lstmModel = LSTMModel(args) with tf.Session() as trainSess: tf.initialize_all_variables().run() saver = tf.train.Saver(tf.all_variables()) for currEpoch in xrange(numEpochs): # For reading batches of training data. currBatchPointer = 0 # Set the learning rate. Decay after every epoch. trainSess.run( tf.assign(_lstmModel.learningRate, learningRate * decayRate**e)) state = _lstmModel.initialState.eval() for currBatch in xrange(numBatches): # Set input and target output data for current batch. inData = inDataBatches[currBatchPointer] targetData = targetDataBatches[currBatchPointer] #print inData # We will feed the data to the session. inputFeed = { _lstmModel.inputData: x, _lstmModel.targetOutput: y, _lstmModel.initialState: state } trainLoss, state, _ = trainSess.run([ _lstmModel.cost, _lstmModel.final_state, _lstmModel.trainStep ], inputFeed) print "epoch".currEpoch print "trainingLoss".trainLoss # Save a checkpoint if currEpoch % 5 == 0: checkpointPath = os.path.join(args.save_dir, 'lstmModel.ckpt') saver.save(trainSess, checkpoint_path, global_step=currEpoch * numBatches + currBatch) print "Saving checkpoint".format(checkpoint_path)
def load_model(path_to_state_dict: str, path_to_config_file: str): with open(path_to_config_file, 'r') as file: config_file = json.load(file) Model = LSTMModel(**config_file) Model.load_state_dict(torch.load(path_to_state_dict)) Model.eval() #LN in eval mode return Model, config_file
def run_train(train_df): # init fasttext embedding weights Main.fasttext_embedding_init() model_obj = LSTMModel(max_sentence_size=Params.max_sentence_size, embed_size=Params.embed_size, vocab_size=len(Params.sentence_tokenizer.word_index) + 1, lstm_units=Params.lstm_units, dense_size=Params.dense_size, label_size=Params.label_size) model = model_obj.get_model() reduce_lr = tf.keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=Params.ReduceLROnPlateau_factor, patience=Params.ReduceLROnPlateau_patience, min_lr=Params.ReduceLROnPlateau_min_lr) if Params.optimizer == "sgd": optimizer = tf.keras.optimizers.SGD(learning_rate=Params.lr) elif Params.optimizer == "adam": optimizer = tf.keras.optimizers.Adam(learning_rate=Params.lr, beta_1=0.9, beta_2=0.999) model.compile(optimizer=optimizer, loss="mean_squared_error", metrics=["accuracy"]) print("------------model summary-------------") print(model.summary()) # split train-valid # validation_split=Params.validation_split # dataset sonundan % x'i valid olarak alıyor, yanlış yöntem ! train, valid = train_test_split(train_df, stratify=train_df[["duplicate"]], test_size=Params.test_size, random_state=Params.random_state) my_callbacks = [ tf.keras.callbacks.EarlyStopping(monitor="val_loss", patience=Params.early_stop_patience), reduce_lr ] history = model.fit([np.array(train["q1"].tolist()), np.array(train["q2"].tolist())], np.array(train["duplicate"].tolist()), batch_size=Params.batch_size, epochs=Params.epoch, validation_data=([np.array(valid["q1"].tolist()), np.array(valid["q2"].tolist())], np.array(valid["duplicate"].tolist())), verbose=1, shuffle=True, callbacks=my_callbacks) model.save(os.path.join(Params.model_dir, "model.h5")) print("-------history---------") print(history.history) Main.plot(history) return model
def training_model(train_data, test_data, num_epochs, batch_size=8, input_dim=1, hidden_dim=100, output_dim=100, seq_dim=7): train_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, drop_last=True) test_loader = DataLoader(dataset=train_data, batch_size=batch_size, shuffle=True, drop_last=True) Mymodel = LSTMModel(input_dim, hidden_dim, 1, output_dim) loss_function = nn.MSELoss() optimizer = torch.optim.Adam(Mymodel.parameters(), lr=0.0001) iters = 0 hisloss = [] for epoch in range(num_epochs): for data_val, target in train_loader: # clean the previous gredient optimizer.zero_grad() outputs = Mymodel(data_val) #calculate loss loss = loss_function(outputs, target) hisloss.append(loss.item()) # using loss to calculate gredient, stored in model loss.backward() # using gredient to update model parameters optimizer.step() iters += 1 if iters % 300 == 0: for test_val, test_target in test_loader: test_outputs = Mymodel(test_val) loss2 = loss_function(test_outputs, test_target) print('Iteration: {}. TrainLoss: {}. TestLoss: {}'.format( iters, loss.item(), loss2.item())) torch.save( Mymodel.state_dict(), 'Trained_model/trained_model_' + str(iters) + '.pkl') plt.plot(hisloss) plt.xlabel('Iteration') plt.ylabel('Training loss') plt.title('Traing process') plt.grid(True) plt.savefig('Trained_model/loss.png') return Mymodel
def __init__(self, t=DEFAULT_CONSEC_FRAMES): print("fall detector init") start_time = time.time() self.consecutive_frames = t self.args = self.cli() argss = [copy.deepcopy(self.args) for _ in range(self.args.num_cams)] self.model = LSTMModel(h_RNN=32, h_RNN_layers=2, drop_p=0.2, num_classes=7) self.model.load_state_dict( torch.load('lstm2.sav', map_location=argss[0].device)) print("Model Loaded") print("Model loaded in time: " + str(time.time() - start_time))
def test_lstm(data_size: float, epoch: int, batch_size: int = 64): """ Evaluate the LSTM model on the DSTC2 data :param data_size: size of data sliced :param epoch: number of epochs to train the model :param batch_size: batch size for each training :return: """ training_data, training_labels = DSTC2.trainset(500).word_vecs( raw_label=True) model = LSTMModel(training_data, training_labels, max_feature_length=50) model.verbose = 1 model.train(data_size, epoch, batch_size) testing_data, testing_labels = DSTC2.testset(500).word_vecs(raw_label=True) return 'ld', data_size, epoch, model.predict(testing_data, testing_labels)
def main(): X_train = load_X(X_train_signals_paths) X_test = load_X(X_test_signals_paths) y_train = load_y(y_train_path) y_test = load_y(y_test_path) # Input Data training_data_count = len(X_train) # 7352 training series (with 50% overlap between each serie) test_data_count = len(X_test) # 2947 testing series n_steps = len(X_train[0]) # 128 timesteps per series n_input = len(X_train[0][0]) # 9 input parameters per timestep # Some debugging info print("Some useful info to get an insight on dataset's shape and normalisation:") print("(X shape, y shape, every X's mean, every X's standard deviation)") print(X_test.shape, y_test.shape, np.mean(X_test), np.std(X_test)) print("The dataset is therefore properly normalised, as expected, but not yet one-hot encoded.") for lr in learning_rate: arch = cfg.arch if arch['name'] == 'LSTM1' or arch['name'] == 'LSTM2': net = LSTMModel() elif arch['name'] == 'Res_LSTM': net = Res_LSTMModel() elif arch['name'] == 'Res_Bidir_LSTM': net = Res_Bidir_LSTMModel() elif arch['name'] == 'Bidir_LSTM1' or arch['name'] == 'Bidir_LSTM2': net = Bidir_LSTMModel() else: print("Incorrect architecture chosen. Please check architecture given in config.py. Program will exit now! :( ") sys.exit() net.apply(init_weights) print(diag) opt = torch.optim.Adam(net.parameters(), lr=lr) criterion = nn.CrossEntropyLoss() net = net.float() params = train(net, X_train, y_train, X_test, y_test, opt=opt, criterion=criterion, epochs=epochs, clip_val=clip_val) evaluate(params['best_model'], X_test, y_test, criterion) plot(params['epochs'], params['train_loss'], params['test_loss'], 'loss', lr) plot(params['epochs'], params['train_accuracy'], params['test_accuracy'], 'accuracy', lr)
def main(_): vocabulary = Vocabulary() vocabulary.load_vocab(FLAGS.vocab_file) if os.path.isdir(FLAGS.checkpoint_path): FLAGS.checkpoint_path =\ tf.train.latest_checkpoint(FLAGS.checkpoint_path) model = LSTMModel(vocabulary.vocab_size, sampling=True, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.load(FLAGS.checkpoint_path) start = vocabulary.encode(FLAGS.start_string) arr = model.predict(FLAGS.max_length, start, vocabulary.vocab_size) print(vocabulary.decode(arr))
def test_lstm_reuters(data_size: float, epoch: int, batch_size: int = 64): """ Evaluate the LSTM model on the Reuters data :param data_size: size of data sliced :param epoch: number of epochs to train the model :param batch_size: batch size for each training :return: """ reuters = Reuters(num_words=500, maxlen=500) training_data, training_labels = reuters.training_set() testing_data, testing_labels = reuters.testing_set() model = LSTMModel(training_data, training_labels, max_feature_length=500, top_words=5000) model.verbose = 1 model.train(data_size, epoch, batch_size) return 'lr', data_size, epoch, model.predict(testing_data, testing_labels)
def main(): # create instance of config config = Config() # create instance of datadeal datadeal = DataDeal(config) #get input data train_in, train_out, valid_in, valid_out, scaler, valid_x = datadeal.data_Deal() # create instance of model model = LSTMModel(config) # get result pre_value, act_value = model.run_Session(train_in, train_out, valid_in, valid_out) #model.run_Session(train_in, train_out, valid_in, valid_out) # calculate RMSE value_pre, value_real = datadeal.inv_Scale(scaler = scaler, valid_x = valid_x, data_pre = pre_value, data_act = act_value) # save result with open(config.dir_output + "result_2.txt", "w") as f: f.write("PRE" + "\t" + "ACT" + "\n") for i in range(len(value_pre)): f.write(str(int(value_pre[i])) + "\t" + str(int(value_real[i])) + "\n")
def evaluation(provincename, cityname, modelpath, data): lat, long = get_location_using_baidu(provincename+cityname) Mymodel = LSTMModel(1, 100, 1, 100) Mymodel.load_state_dict(torch.load(modelpath)) series = data.loc[(data["provinceName"] == provincename) & (data["cityName"] == cityname), "ts"].values.tolist() series = np.reshape(series,(-1,3)) if np.isnan(series[-1][0]): series = series[:-1] diff_series = np.diff(series,axis = 0) n = len(diff_series) predict_series = np.array(series[0:7,0:1]) store_diff = np.array(diff_series[0:7,0:1]) for i in range(n-7): seq = np.array(diff_series[i:i+7]) total_recover = np.sum(seq[:,1]) total_death = np.sum(seq[:,2]) seq = seq[:,0:1] mean = np.mean(seq[:,0],axis = 0) std = np.std(seq[:,0],axis = 0) seq -= mean if std!=0: seq /= std tensor_seq = torch.tensor(seq, dtype=torch.float, requires_grad=False) add_seq = torch.tensor([[lat], [long], [total_recover], [total_death], [mean], [std]]) tensor_seq = torch.cat([tensor_seq,add_seq]) tensor_seq.resize_(1, 13, 1) predictions = np.array(Mymodel(tensor_seq).tolist()[0]) real_diff = predictions * std + mean store_diff = np.append(store_diff,[real_diff],axis = 0) if i>=n-7: print (diff_series) print ([real_diff[0],0,0]) diff_series = np.append(diff_series,[real_diff[0],0,0],axis = 0) predict_series = np.append(predict_series,[np.array(list(map(sum,zip(predict_series[-1],real_diff))))],axis = 0) else: predict_series = np.append(predict_series,[np.array(list(map(sum,zip(series[i+6][0:1],real_diff))))],axis = 0) return series, predict_series, provincename+cityname, store_diff, diff_series
def main(_): if os.path.exists(checkpoint_path) is False: os.makedirs(checkpoint_path) # 读取训练文本 with open(datafile, 'r', encoding='utf-8') as f: train_data = f.read() # 加载/生成 词典 vocabulary = Vocabulary() if FLAGS.vocab_file: vocabulary.load_vocab(FLAGS.vocab_file) else: vocabulary.build_vocab(train_data) vocabulary.save(FLAGS.vocab_file) input_ids = vocabulary.encode(train_data) g = batch_generator(input_ids, FLAGS.batch_size, FLAGS.num_steps) model = LSTMModel(vocabulary.vocab_size, batch_size=FLAGS.batch_size, num_steps=FLAGS.num_steps, lstm_size=FLAGS.lstm_size, num_layers=FLAGS.num_layers, learning_rate=FLAGS.learning_rate, train_keep_prob=FLAGS.train_keep_prob, use_embedding=FLAGS.use_embedding, embedding_size=FLAGS.embedding_size) model.train( g, FLAGS.max_steps, checkpoint_path, FLAGS.save_every_n, FLAGS.log_every_n, )
print('Basic Dialog RNN Model.') elif args.base_model == 'GRU': model = GRUModel(D_m, D_e, D_h, n_classes=n_classes, dropout=args.dropout) print('Basic GRU Model.') elif args.base_model == 'LSTM': model = LSTMModel(D_m, D_e, D_h, n_classes=n_classes, dropout=args.dropout) print('Basic LSTM Model.') else: print('Base model must be one of DialogRNN/LSTM/GRU/Transformer') raise NotImplementedError name = 'Base' if cuda: model.cuda() # for daily_dialog_bert2.pkl
shuffle=True, num_workers=args.workers, pin_memory=True) val_loader = torch.utils.data.DataLoader(val_dataset, batch_size=args.batch_size, shuffle=False, num_workers=args.workers, pin_memory=True) if os.path.exists(os.path.join(args.model, 'checkpoint.pth.tar')): # load existing model model_info = torch.load(os.path.join(args.model, 'checkpoint.pth.tar')) print("==> loading existing model '{}' ".format(model_info['arch'])) original_model = models.__dict__[model_info['arch']](pretrained=False) model = LSTMModel(original_model, model_info['arch'], model_info['num_classes'], model_info['lstm_layers'], model_info['hidden_size'], model_info['fc_size']) # print(model) model.cuda() model.load_state_dict(model_info['state_dict']) best_prec = model_info['best_prec'] cur_epoch = model_info['epoch'] else: if not os.path.isdir(args.model): os.makedirs(args.model) # load and create model print("==> creating model '{}' ".format(args.arch)) original_model = models.__dict__[args.arch](pretrained=True) model = LSTMModel(original_model, args.arch, len(train_dataset.classes), args.lstm_layers, args.hidden_size, args.fc_size)
def __init__(self, env, obs_space, action_space, ignoreLTL, gnn_type, dumb_ac, freeze_ltl): super().__init__() # Decide which components are enabled self.use_progression_info = "progress_info" in obs_space self.use_text = not ignoreLTL and (gnn_type == "GRU" or gnn_type == "LSTM") and "text" in obs_space self.use_ast = not ignoreLTL and ("GCN" in gnn_type) and "text" in obs_space self.gnn_type = gnn_type self.device = torch.device( "cuda" if torch.cuda.is_available() else "cpu") self.action_space = action_space self.dumb_ac = dumb_ac self.freeze_pretrained_params = freeze_ltl if self.freeze_pretrained_params: print("Freezing the LTL module.") self.env_model = getEnvModel(env, obs_space) # Define text embedding if self.use_progression_info: self.text_embedding_size = 32 self.simple_encoder = nn.Sequential( nn.Linear(obs_space["progress_info"], 64), nn.Tanh(), nn.Linear(64, self.text_embedding_size), nn.Tanh()).to(self.device) print( "Linear encoder Number of parameters:", sum(p.numel() for p in self.simple_encoder.parameters() if p.requires_grad)) elif self.use_text: self.word_embedding_size = 32 self.text_embedding_size = 32 if self.gnn_type == "GRU": self.text_rnn = GRUModel( obs_space["text"], self.word_embedding_size, 16, self.text_embedding_size).to(self.device) else: assert (self.gnn_type == "LSTM") self.text_rnn = LSTMModel( obs_space["text"], self.word_embedding_size, 16, self.text_embedding_size).to(self.device) print( "RNN Number of parameters:", sum(p.numel() for p in self.text_rnn.parameters() if p.requires_grad)) elif self.use_ast: hidden_dim = 32 self.text_embedding_size = 32 self.gnn = GNNMaker(self.gnn_type, obs_space["text"], self.text_embedding_size).to(self.device) print( "GNN Number of parameters:", sum(p.numel() for p in self.gnn.parameters() if p.requires_grad)) # Memory specific code. self.image_embedding_size = self.env_model.size() self.memory_rnn = nn.LSTMCell(self.image_embedding_size, self.semi_memory_size) self.embedding_size = self.semi_memory_size print("embedding size:", self.embedding_size) if self.use_text or self.use_ast or self.use_progression_info: self.embedding_size += self.text_embedding_size if self.dumb_ac: # Define actor's model self.actor = PolicyNetwork(self.embedding_size, self.action_space) # Define critic's model self.critic = nn.Sequential(nn.Linear(self.embedding_size, 1)) else: # Define actor's model self.actor = PolicyNetwork(self.embedding_size, self.action_space, hiddens=[64, 64, 64], activation=nn.ReLU()) # Define critic's model self.critic = nn.Sequential(nn.Linear(self.embedding_size, 64), nn.Tanh(), nn.Linear(64, 64), nn.Tanh(), nn.Linear(64, 1)) # Initialize parameters correctly self.apply(init_params)
train_data = Corpus('data/train.dat') valid_data = Corpus('data/valid.dat') test_data = Corpus('data/test.dat') # ================================================= # Build model # ================================================= print('=' * 89) print('building model...') print('=' * 89) n = len(corpus.vocab) # model = RNNModel(args.model, n, EMBED_SIZE, NUM_HID, NUM_LAY) model = LSTMModel(n, EMBED_SIZE, NUM_HID, glove_path, corpus.word2idx) if args.cuda: model.cuda() # criterion = nn.CrossEntropyLoss() # ================================================= # Utility functions # ================================================= # *** do not use *** eval_batch_size = 10 # train_data = batchify(corpus.train, BATCH_SIZE) # valid_data = batchify(corpus.valid, eval_batch_size) # test_data = batchify(corpus.test, eval_batch_size)
# -*- coding: utf-8 -*- # @Author: LogicJake # @Date: 2018-11-13 19:02:55 # @Last Modified time: 2018-11-18 20:46:42 from preprocessing import Preprocessing from model import LSTMModel if __name__ == '__main__': preprocessing = Preprocessing('../..//dataset/input.csv', '../../dataset/output.csv') preprocessing.reformat() model = LSTMModel() model.train()
def train(trainX, trainY, epoch, lr, batchSize, modelPath, lookBack, method): lossFilePath = "../model/loss_ResRNN-4.pkl" output = open(lossFilePath, 'wb') lossList = [] n = trainX.shape[0] print("trainx num is:", n) batchNum = n // batchSize - 1 print("batch num is:", batchNum) if method == "RNN": net = RNNModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="RNN") if method == "LSTM": net = LSTMModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="LSTM") if method == "GRU": net = GRUModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="GRU") if method == "ResRNN": #net = ResidualRNNModel(inputDim=1, hiddenNum=100, outputDim=1, layerNum=1, cell="RNNCell") net = ResRNNModel(inputDim=1, hiddenNum=100, outputDim=1, resDepth=-1) if method == "attention": net = AttentionRNNModel(inputDim=1, hiddenNum=100, outputDim=1, seqLen=lookBack) if method == "ANN": net = ANNModel(inputDim=lookBack, hiddenNum=100, outputDim=1) if method == "new": net = DecompositionNetModel(inputDim=lookBack, fchiddenNum=100, rnnhiddenNum=100, outputDim=1) optimizer = optim.RMSprop(net.parameters(), lr=lr, momentum=0.9) #scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min') #optimizer = optim.SGD(net.parameters(), lr=0.001) t1 = time.time() for i in range(epoch): trainX, trainY = shuffle(trainX, trainY, random_state=epoch) batchStart = 0 lossSum = 0 for j in range(batchNum): x = trainX[batchStart:batchStart + batchSize, :, :] y = trainY[batchStart:batchStart + batchSize] x = torch.from_numpy(x) y = torch.from_numpy(y) x, y = Variable(x), Variable(y) optimizer.zero_grad() if method == "new": pred = net.forward(x, batchSize=batchSize) # criterion = nn.MSELoss() #loss = criterion(pred, y) loss = MSE_Loss(pred, y) else: pred = net.forward(x, batchSize=batchSize) criterion = nn.MSELoss() loss = criterion(pred, y) lossSum += loss.data.numpy()[0] if j % 30 == 0 and j != 0: print("current loss is:", lossSum / 10) lossList.append(lossSum / 10) lossSum = 0 #net.zero_grad() loss.backward() optimizer.step() #scheduler.step(loss) batchStart += batchSize print("%d epoch is finished!" % i) t2 = time.time() print("train time:", t2 - t1) p.dump(lossList, output, -1) torch.save(net, modelPath)
max_step = 20000 # Directory where the checkpoints will be saved checkpoint_dir = './training_checkpoints' # Name of the checkpoint files checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{step}") input_ids = vocabulary.encode(train_data) batch_data = batch_generator(input_ids, batch_size, seq_len) model = LSTMModel(vocabulary.vocab_size, batch_size=batch_size, num_steps=seq_len, lstm_size=128, num_layers=2, sampling=False, drop_out=0.5, use_embedding=False, embedding_size=128) optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) loss_fn = tf.losses.SparseCategoricalCrossentropy(from_logits=True) # define metrics train_loss = tf.keras.metrics.Mean(name='train_loss') step = 0 for nb, (X, y) in enumerate(batch_data): start = time.time() train_loss.reset_states() step += 1
# Constructs the newtork. network = args.network.lower() vocab_size = len(vocab) num_classes = len(train_ds.label_list) pad_token_id = vocab.to_indices('[PAD]') if network == 'bow': model = BoWModel(vocab_size, num_classes, padding_idx=pad_token_id) elif network == 'bigru': model = GRUModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'bilstm': model = LSTMModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'bilstm_attn': lstm_hidden_size = 196 attention = SelfInteractiveAttention(hidden_size=2 * stm_hidden_size) model = BiLSTMAttentionModel(attention_layer=attention, vocab_size=vocab_size, lstm_hidden_size=lstm_hidden_size, num_classes=num_classes, padding_idx=pad_token_id) elif network == 'birnn': model = RNNModel(vocab_size, num_classes, direction='bidirect', padding_idx=pad_token_id) elif network == 'cnn':
def main(path_to_data: str, cache_dir: str, texts_col: str, labels_col: str, n_classes: int, batch_size: int, batch_size_eval: int, min_lr: int, max_lr: int, n_epochs: int, cuda: int = 0): ''' ''' df = pd.read_csv(path_to_data) if os.path.isdir(cache_dir): logger.info('Cache dir found here {}'.format(cache_dir)) pass else: logger.info('Creating cache dir') os.mkdir(cache_dir) # Preprocess optimal_length = get_length(df, texts_col) X, vocab_size = encode_texts(df, texts_col, max_seq_length=optimal_length, return_vocab_size=True) y = get_labels(df, labels_col, n_classes) train_loader, test_loader = create_TorchLoaders( X, y, test_size=0.10, batch_size=batch_size, batch_size_eval=batch_size_eval) Model = LSTMModel(vocab_size=vocab_size, n_classes=n_classes) config_dict = { "vocab_size": vocab_size, "n_classes": n_classes, "max_length": optimal_length } if n_classes > 2: criterion = torch.nn.CrossEntropyLoss() else: criterion = torch.nn.BCEWithLogitsLoss() optim = torch.optim.Adam(Model.parameters()) ## Heuristic opt_cycle = ((((len(X) * (1 - 0.10)) / batch_size) * n_epochs) * 0.25) / 2 schedul = torch.optim.lr_scheduler.CyclicLR(optim, min_lr, max_lr, step_size_up=opt_cycle, step_size_down=opt_cycle, mode="exp_range", cycle_momentum=False, gamma=0.999) if cuda == 1: Model.cuda() device = "cuda" else: device = "cpu" metrics = { "training_loss": [], "eval_loss": [], "training_f1": [], "eval_f1": [] } logger.info("Starting training for {} epochs".format(n_epochs)) for epoch in range(n_epochs): Model.train() progress = progressbar.ProgressBar() for batch in progress(train_loader): batch = tuple(t for t in batch) inputs, labels = batch #unpacking inputs = inputs.to(device, dtype=torch.long) labels = labels.to(device, dtype=torch.float) preds = Model(inputs) loss = criterion(preds, labels) ## Metrics computation metrics["training_loss"].append(loss.item()) preds = preds.to("cpu").detach().numpy() preds = flat_pred(preds, 0.5) tmp_f1 = f1_score(labels.to("cpu").detach().numpy(), preds, average='macro') metrics["training_f1"].append(tmp_f1) ## Backward pass ## loss.backward() optim.step() #Gradient descent schedul.step() Model.zero_grad() logger.info( "Epoch {} done with: training loss: {}\n training f1: {}".format( epoch, loss.item(), tmp_f1)) ## Eval progress = progressbar.ProgressBar() Model.eval() for batch in progress(test_loader): with torch.no_grad(): #computationaly efficient batch = tuple(t for t in batch) inputs, labels = batch inputs = inputs.to(device, dtype=torch.long) labels = labels.to(device, dtype=torch.float) preds = Model(inputs) eval_loss = criterion(preds, labels) ## Eval metrics metrics["eval_loss"].append(eval_loss.item()) preds = preds.to("cpu").detach().numpy() preds = flat_pred(preds, 0.5) tmp_f1 = f1_score(labels.to("cpu").detach().numpy(), preds, average='macro') ## detach metrics["eval_f1"].append(tmp_f1) logger.info( "Evaluation at iteration {} done: eval loss: {}\n eval f1: {}". format(epoch, eval_loss.item(), tmp_f1)) ## Bring back model to cpu Model.cpu() ## Get/Save param dict logger.info('Saving model in cache dir {}'.format(cache_dir)) torch.save(Model.state_dict(), os.path.join(cache_dir, 'state_dict.pt')) with open(os.path.join(cache_dir, 'config_model.json'), 'w') as file: json.dump(config_dict, file)
test_loader = DataLoader( test_dataset, batch_size=32, shuffle=False, num_workers=4, collate_fn=lstm_collate_fn, ) ############################################################################### encoder_cnn = EncoderCNN(emb_size) encoder_cnn = encoder_cnn.to(device) if model == "lstm": f_rnn = LSTMModel(emb_size, emb_size, emb_size, device, bidirectional=False) b_rnn = LSTMModel(emb_size, emb_size, emb_size, device, bidirectional=False) f_rnn = f_rnn.to(device) b_rnn = b_rnn.to(device) criterion = nn.CrossEntropyLoss() params_to_train = (list(encoder_cnn.parameters()) + list(f_rnn.parameters()) + list(b_rnn.parameters())) optimizer = torch.optim.SGD(params_to_train, lr=2e-1, momentum=0.9) scheduler = lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.5)
#Checking if graphic card is able to process cuda operation if using_gpu: device = "cuda:2" print("Training on GPU") else: device = "cpu" print("Training on CPU") os.path.dirname(os.path.abspath(__file__)) #Hyperparameter for Models modelparameters = [ 34, 128, 35, 2, 1, 0.2 ] #inputnNeurons, hidden hize, output neurons,layers, directions, dropout model = LSTMModel(modelparameters[0], modelparameters[1], modelparameters[2], modelparameters[3], modelparameters[4], modelparameters[5]) #creating the model model.to(device) #Hyperparameter of the Training batch_size = 6 valid_batch_size = 11 epochs = 500 learning_rate = 0.001 print_every = 5 criterion = nn.MSELoss() optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate) home_directory = os.path.dirname(os.path.abspath(__file__)) training_path = ""
help='number of output units for model (default: 30)') parser.add_argument('--seed', type=int, default=1111, help='random seed (default: 1111)') parser.add_argument('--model_type', type=str, default='none', help='model type to execute (default: none, pass VRAE for executing)') args = parser.parse_args() curr_time = strftime("%Y%m%d%H%M%S", localtime()) # args.cuda = torch.cuda.is_available() # initialize model and params if args.model == 'LSTM': model = LSTMModel(cuda=args.cuda) elif args.model == 'TCN': channel_sizes = [args.nhid] * args.levels model = TCNModel(args.nhid, args.opsize, channel_sizes, args.ksize, args.dropout, 128, use_cuda=args.cuda) elif args.model == 'SOCIAL': model = Social_Model(cuda=args.cuda) elif args.model == 'VRAE': model = VRAE(sequence_length=30, number_of_features=2, block='GRU') if args.mode == 'train': logger_dir = './runs/' + args.model + '/' + curr_time + '/' model_dir = './models/' + args.model + '/' + curr_time + '/' os.makedirs(model_dir) else: logger_dir=None model_dir=args.model_dir
print("Creating dictionary...") dictionary = Dictionary(query_files) with open("./saved/dictionary.pkl", "wb") as f: pickle.dump(dictionary, f) nchar = len(dictionary) max_seq_len = dictionary.max_seq_len lr = args.lr clip = args.clip batch_size = args.batch_size eval_batch_size = 10 best_val_loss = None if args.model == 'LSTM': model = LSTMModel(nchar, args.nhid, args.nlayers, max_seq_len, args.dropout) if args.load_latest: latest = max([f for f in os.listdir("./saved/lstm")]) latest_path = os.path.join("./saved/lstm", latest) model = model.load_state_dict(torch.load(latest_path)) model = model.to(device) save(model, args.save) criterion = nn.NLLLoss(ignore_index=0) optimizer = torch.optim.Adam(model.parameters(), lr=lr) # At any point you can hit Ctrl + C to break out of training early. try: print("Start training...") for epoch in tqdm(range(1, args.epochs+1)):
import torch from dataset import SquaresDataset from model import LSTMModel import train_test_old import train_test FRAME_WIDTH = 10 LSTM_INPUT_SIZE = 50 LSTM_HIDDEN_SIZE = 32 BATCH_SIZE = 32 device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") model = LSTMModel(LSTM_INPUT_SIZE, LSTM_HIDDEN_SIZE) model = model.to(device) squares_dataset_train = SquaresDataset(frame_width=FRAME_WIDTH, n=1000) squares_dataset_test = SquaresDataset(frame_width=FRAME_WIDTH, n=100) xtion1_train_loader = torch.utils.data.DataLoader(squares_dataset_train, batch_size=BATCH_SIZE) xtion1_test_loader = torch.utils.data.DataLoader(squares_dataset_test, batch_size=BATCH_SIZE) train_test.train(model, xtion1_train_loader, xtion1_test_loader, n_classes=2, epochs=2)
target_pl_train_X, target_pl_train_Y, target_pl_train_bi_X, target_pl_train_weight = utils_data.get_zx_pl_data() init_embedding = utils_data.get_embedding(FLAGS.target) # init_bi_embedding = utils_data.get_bi_embedding(FLAGS.target) tfConfig = tf.ConfigProto() tfConfig.gpu_options.per_process_gpu_memory_fraction = FLAGS.memory with tf.Graph().as_default(), tf.Session(config=tfConfig) as sess: if FLAGS.target == "zx": vocab_size = 4704 bi_vocab_size = 250734 if FLAGS.model == "lstm": m = LSTMModel(config.hidden_size, config.max_grad_norm, config.num_layers, vocab_size, config.embedding_size, config.num_classes, config.learning_rate, config.bi_direction, init_embedding) elif FLAGS.model == "lstmlm": m = LSTMLMModel(config.hidden_size, config.max_grad_norm, config.num_layers, vocab_size, config.embedding_size, config.num_classes, config.learning_rate, config.bi_direction, init_embedding) sess.run(tf.global_variables_initializer()) best_valid_f1 = 0. model_path = "model/%s_%s_%s_%s_%s.ckpt" % (FLAGS.model, FLAGS.source, FLAGS.target, str(FLAGS.pl), FLAGS.name) saver = tf.train.Saver() # saver.restore(sess, model_path) for epoch in range(config.max_epochs):