def train(data_loader, model, epoch_num, batch_size): # generate data xtrain, ytrain, xtest, ytest = data_loader # training bar process_bar = sqdm() for epoch in range(epoch_num): print(f"Epoch [{epoch + 1}/{epoch_num}]") for xdata, ydata in data_iter(batch_size, xtrain, ytrain): model.fit(xdata, ydata) # train train_pred = model.predict_prob(xdata) train_loss = model.entropy_loss(train_pred, ydata.reshape(train_pred.shape)) train_acc = model.score(xdata, ydata) # test test_pred = model.predict_prob(xtest) test_loss = model.entropy_loss(test_pred, ytest.reshape(test_pred.shape)) test_acc = model.score(xtest, ytest) process_bar.show_process(len(ytrain), batch_size, train_loss=train_loss, test_loss=test_loss, train_score=train_acc, test_score=test_acc) print("\n") return model
for xdata, ydata in data_iter(batch_size, x, y): model.fit(xdata, ydata) mse = model.score(xdata, ydata) process_bar.show_process(len(y), batch_size, mse) print("\n") return model """generate data""" input_num = 10000 true_w = np.array([2, -3.2]) true_b = np.array([4.2]) x = np.random.normal(0, 1, size=(input_num, len(true_w))) error = np.random.normal(0, 0.01, size=input_num) y = x @ true_w + true_b + error """model training""" params = { "x": x, "y": y, "epoch_num": 100, "model": LinearBridge, "batch_size": 128, "alpha": 0.01, "weight_decay": 0.05, } process_bar = sqdm() model = train(**params) print(f"w before update is {true_w}, w after update is {model.w}") print(f"b before update is {true_b}, b after update is {model.b}")
def train_rnn_pytorch(epoch_num, batch_num, model, loss, optimizer, data_iter, corpus_index, num_step, batch_size, char_to_idx, vocab_set, vocab_size, prefixs, pred_num, predict_rnn_pytorch, clipping_theta=1e-2, random_sample=True, device="cuda"): """ function: training and predict in rnn params epoch_num: the number of epoch params batch_num: the number of batch in a epoch params model: the rnn model params loss: such as nn.CrossEntropyLoss() params optimizer: optimizer in pytorch params data_iter: data_iter_random/data_iter_consecutive params corpus_index: the index of corpus params num_step: the number of time step in rnn params batch_size: the size of a batch params char_to_idx: char index which convert Chinese to idx params vocab_set: the list of word in corpus params vocab_size: the length of vocab_set params prefixs: the list include input when you want to predict, such as ["分开", "不分开"] params pred_num: the number you want to predict params clipping_heta: the max value of the norm of grad params random_sample: if sample in random, use data_iter_random. otherwise, use data_iter_consecutive params device: "cpu"/"cuda" """ # training bar process_bar = sqdm() # init(use in calculate perplexity) l_sum, n_class = 0, 0 for epoch in range(epoch_num): # sample in consecutive if not random_sample: h_state = None print(f"Epoch [{epoch + 1}/{epoch_num}]") for x, y in data_iter(corpus_index, batch_size, num_step, device): x = to_onehot(x, vocab_size, device) # if sample with random, init h_state in each batch if random_sample: h_state = None else: # split h_state from cal graph, when sample_consecutive if h_state is not None: if isinstance(h_state, tuple): # lstm, state: (h, c) h_state = (h_state[0].detach(), h_state[1].detach()) else: h_state.detach_() # rnn, the shape of outputs is (num_step, batch_size, vocab_size) outputs, h_state = model(x, h_state) # In order to calculate loss, change outputs shape and y shape outputs = outputs.view(-1, outputs.shape[-1]) y = y.transpose(0, 1).contiguous().view(-1) # calculate loss, y --> long type l = loss(outputs, y.long()) # clear grad optimizer.zero_grad() # grad backward l.backward() # grad clipping grad_clipping(model.parameters(), clipping_theta, device) # update grad optimizer.step() # loss_sum l_sum += l.item() * y.shape[0] n_class += y.shape[0] # calculate perplexity try: perplexity = np.exp(l_sum / n_class) except OverflowError: perplexity = float('inf') # training bar process_bar.show_process(batch_num, 1, train_loss=perplexity) # predict print("\n") for prefix in prefixs: print( f"prefix-{prefix}: ", predict_rnn_pytorch(prefix, pred_num, model, char_to_idx, vocab_set, vocab_size, device)) print("\n")
def train_rnn(epoch_num, batch_num, rnn, loss, init_hidden_state, get_params, data_iter, corpus_index, num_step, hidden_num, lr, batch_size, char_to_idx, vocab_set, vocab_size, prefixs, predict_rnn, pred_num, clipping_theta=1e-2, random_sample=True, device="cuda"): """ function: training and predict in rnn params epoch_num: the number of epoch params batch_num: the number of batch in a epoch params rnn: the rnn model params loss: such as nn.CrossEntropyLoss() params init_hidden_state: define the state of hidden layer params get_params: get the weight and bias in rnn params data_iter: data_iter_random/data_iter_consecutive params corpus_index: the index of corpus params num_step: the number of time step in rnn params hidden_num: the number of unit in hidden layer in rnn params lr: the learning rate params batch_size: the size of a batch params char_to_idx: char index which convert Chinese to idx params vocab_set: the list of word in corpus params vocab_size: the length of vocab_set params prefixs: the list include input when you want to predict, such as ["分开", "不分开"] params pred_num: the number you want to predict params clipping_heta: the max value of the norm of grad params random_sample: if sample in random, use data_iter_random. otherwise, use data_iter_consecutive params device: "cpu"/"cuda" """ # training bar process_bar = sqdm() # init l_sum, n_class = 0, 0 # get params in rnn params = get_params(vocab_size, hidden_num, vocab_size, device) for epoch in range(epoch_num): # sample in consecutive if not random_sample: h_state = init_hidden_state(batch_size, hidden_num, device) print(f"Epoch [{epoch + 1}/{epoch_num}]") for x, y in data_iter(corpus_index, batch_size, num_step, device): # x shape: (num_step, batch_size, vocab_size) x = to_onehot(x, vocab_size, device) # if sample with random, init h_state in each batch if random_sample: h_state = init_hidden_state(x.shape[1], hidden_num, device) else: if h_state is not None: if isinstance(h_state, tuple): h_state = (h_state[0].detach_(), h_state[1].detach_()) else: # split h_state from cal graph, when sample_consecusive h_state.detach_() # rnn, the shape of outputs is (num_step, batch_size, vocab_size) outputs, h_state = rnn(x, h_state, params) # In order to calculate loss, change outputs shape and y shape outputs = outputs.view(-1, outputs.shape[-1]) y = y.transpose(0, 1).contiguous().view(-1) # calculate loss, y --> long type l = loss(outputs, y.long()) # update params if params[0].grad is not None: for param in params: param.grad.data.zero_() # backward l.backward() # grad clip grad_clipping(params, clipping_theta, device) # sgd sgd(params, lr) # loss_sum l_sum += l.item() * y.shape[0] n_class += y.shape[0] # perplexity try: perplexity = np.exp(l_sum / n_class) except OverflowError: perplexity = float("inf") # training bar process_bar.show_process(batch_num, 1, train_loss=perplexity) # predict print("\n") for prefix in prefixs: print( f"prefix-{prefix}: ", predict_rnn(prefix, pred_num, rnn, init_hidden_state, hidden_num, params, char_to_idx, vocab_set, vocab_size, device)) print("\n")
def train_epoch(data_num, epoch_num, model, loss, train_iter, batch_size, optimizer=None, test_iter=None, evaluate=None, draw=False, draw_mean=False, save_fig=False, save_path="./img/", accum_step=1, gpu=False): """ function: training in pytorch (only with epoch), it will speed up in training in epoch params data_num: the number of sample in train set params epoch_num: the number of epoch params model: model which fit data params loss: calculate loss function params train_iter: train data loader params optimizer: torch optimizer which is used to update grad params test_iter: test data loader, use in testing params evaluate: criterion such as acc/f1 score params draw: draw figure with loss and score in train/test whether or not params draw_epoch: draw with data in iteration or epoch params save_fig: save figure whether or not params save_path: the path of saving figure params accum_step: use in gradient accumulation, after the number of accum_step, it will be update the grad of parameters params gpu: if want to use gpu and cuda is available, it will send tensor to gpu """ # training bar process_bar = sqdm() # init test_loss = test_score = "-" # storage loss and score in train and test train_loss_list, test_loss_list = [], [] train_score_list, test_score_list = [], [] # iteration num in each epoch iter_num = np.ceil(data_num / batch_size) for epoch in range(epoch_num): print(f"Epoch [{epoch + 1}/{epoch_num}]") count, mean_train_loss, mean_train_score = 1., 0., 0. test_num, mean_test_loss, mean_test_score = 0., 0., 0. for x, y in train_iter: # cuda available and want to use gpu if torch.cuda.is_available() and gpu: # send tensor from cpu to gpu x = x.cuda() y = y.cuda() # train model.train() train_pred = model(x) train_loss = loss(train_pred, y) / accum_step # calculate mean train loss mean_train_loss = (( (count - 1) * mean_train_loss + accum_step * train_loss) / count).item() # when draw_mean is True, we save the mean_train_loss in each iteration, # otherwise, we save the train_loss in the last iteration in each epoch if count == iter_num: if draw_mean: train_loss_list.append(mean_train_loss) else: train_loss_list.append(accum_step * train_loss) # if parameter have criterion(evaluate), like accuracy/f1_score # use this criterion to calculate train_score model.eval() if evaluate is not None: train_score = evaluate(x, y) mean_train_score = ( (count - 1) * mean_train_score + train_score) / count # function like the draw_epoch in train loss if count == iter_num: if draw_mean: train_score_list.append(mean_train_score) else: train_score_list.append(train_score) model.train() # bp train_loss.backward() if (count % accum_step) == 0: # grad update optimizer.step() # clear grad optimizer.zero_grad() if count == iter_num: optimizer.zero_grad() # test loss with torch.no_grad(): if (test_iter is not None) and (count == iter_num): # eval model, it will stop dropout and batch normalization model.eval() for test_data, test_label in test_iter: # calculate the num of test set test_num += len(test_label) # if cuda available and want to use gpu if torch.cuda.is_available() and gpu: test_data = test_data.cuda() test_label = test_label.cuda() test_pred = model(test_data) test_loss = loss(test_pred, test_label).item() mean_test_loss += len(test_label) * test_loss # print(mean_test_loss) # use this criterion to calculate test_score if evaluate is not None: test_score = evaluate(test_data, test_label) mean_test_score += len(test_label) * test_score # result mean_test_loss /= test_num test_loss_list.append(mean_test_loss) if evaluate is not None: mean_test_score /= test_num test_score_list.append(mean_test_score) # change to train model model.train() # update counter count += 1 # training bar if evaluate is None: mean_train_score = "-" mean_test_score = "-" if test_iter is None: mean_test_loss = "-" mean_test_score = "-" # use mean loss and score in each epoch process_bar.show_process(data_num, batch_size=batch_size, train_loss=mean_train_loss, train_score=mean_train_score, test_loss=mean_test_loss, test_score=mean_test_score) print("\n") # draw loss figure and score figure # especially, loss in train dataset use train_loss not mean_loss in drawing if draw: # set figure format set_fig_display(axes_spines_state=[True] * 4) # add new figure and subplot fig = plt.figure() ax1 = fig.add_subplot(111) ax1.plot(range(len(train_loss_list)), train_loss_list, label="train_loss") if len(test_loss_list) > 0: ax1.plot(range(len(train_loss_list)), test_loss_list, label="test_loss") ax1.set_xlabel("epoch num") ax1.set_ylabel("loss") # ax2 if evaluate is not None: ax2 = ax1.twinx() ax2.plot(range(len(train_score_list)), train_score_list, "c-", label="train_score", alpha=0.8) if len(test_score_list) > 0: ax2.plot(range(len(train_score_list)), test_score_list, "r-", label="test_score", alpha=0.8) ax2.set_ylabel("score") ax2.set_ylim([0, 1.1]) if test_iter is None and evaluate is None: legend_labels = ["train_loss"] legend_loc = [0.75, 0.82] elif test_iter is None: legend_labels = ["train_loss", "train_score"] legend_loc = [0.58, 0.82] elif evaluate is None: legend_labels = ["train_loss", "test_loss"] legend_loc = [0.595, 0.82] else: ax2.set_ylim([0, 1.18]) legend_labels = [ "train_loss", "test_loss", "train_score", "test_score" ] legend_loc = [0.58, 0.77] fig.legend(labels=legend_labels, ncol=2, loc=legend_loc) if save_fig: if not os.path.exists(save_path): os.mkdir(save_path) fig_name = "fig" + datetime.now().strftime( "%Y%m%d_%H%M%S") + ".jpg" plt.savefig(save_path + fig_name, dpi=200) plt.show()
def train_experiment(data_num, epoch_num, model, loss, train_iter, batch_size, lr=0.01, weight_decay=0, params=None, optimizer=None, test_iter=None, evaluate=None, draw=False, draw_epoch=False, save_fig=False, save_path="./img/", gpu=False): """ function: training in pytorch (experiment in self-define and nn.Module) params data_num: the number of sample in train set params epoch_num: the number of epoch params model: model which fit data params loss: calculate loss function params train_iter: train data loader params lr: learning rate params weight_decay: the wight of weight_decay/L2 regularization params params: the parameters needed to update grad params optimizer: torch optimizer which is used to update grad params test_iter: test data loader, use in testing params evaluate: criterion such as acc/f1 score params draw: draw figure with loss and score in train/test whether or not params draw_epoch: draw with data in iteration or epoch params save_fig: save figure whether or not params save_path: the path of saving figure params gpu: if want to use gpu and cuda is available, it will send tensor to gpu """ # training bar process_bar = sqdm() # test data which is used to test after train if test_iter is not None: test_data, test_label = iter(test_iter).next() # if cuda available and want to use gpu if torch.cuda.is_available() and gpu: test_data = test_data.cuda() test_label = test_label.cuda() # storage loss and score in train and test train_loss_list, test_loss_list = [], [] train_score_list, test_score_list = [], [] # iteration num in each epoch iter_num = np.ceil(data_num / batch_size) for epoch in range(epoch_num): print(f"Epoch [{epoch + 1}/{epoch_num}]") count, mean_train_loss, mean_train_score = 1., 0., 0. mean_test_loss, mean_test_score = 0., 0. for x, y in train_iter: # cuda available and want to use gpu if torch.cuda.is_available() and gpu: # send tensor from cpu to gpu x = x.cuda() y = y.cuda() # train train_pred = model(x) train_loss = loss(train_pred, y) # calculate mean train loss mean_train_loss = (((count - 1) * mean_train_loss + train_loss) / count).item() # when True, draw with epoch mean loss in train # when False, draw with iteration loss in train (default False) if not draw_epoch: train_loss_list.append(train_loss.item()) else: if count == iter_num: train_loss_list.append(mean_train_loss) # if parameter have criterion(evaluate), like accuracy/f1_score # use this criterion to calculate train_score if evaluate is not None: train_score = evaluate(x, y) mean_train_score = ( (count - 1) * mean_train_score + train_score) / count # function like the draw_epoch in train loss if not draw_epoch: train_score_list.append(train_score) else: if count == iter_num: train_score_list.append(mean_train_score) # clear grad if optimizer is not None: # use torch optimizer optimizer.zero_grad() elif (params is not None) and (params[0].grad is not None): # the grad is None when we define ourself and use in first time for param in params: param.grad.data.zero_() # bp train_loss.backward() # grad update if optimizer is not None: optimizer.step() else: sgd(params, lr, weight_decay) # test loss if test_iter is not None: if isinstance(model, nn.Module): # stop dropout and batch normalization model.eval() test_pred = model(test_data) test_loss = loss(test_pred, test_label).item() mean_test_loss = ( (count - 1) * mean_test_loss + test_loss) / count # use this criterion to calculate test_score if evaluate is not None: test_score = evaluate(test_data, test_label) mean_test_score = ( (count - 1) * mean_test_score + test_score) / count if not draw_epoch: test_score_list.append(test_score) else: if count == iter_num: test_score_list.append(mean_test_score) model.train() else: # self-define model, if have the parameters "is_training" if "is_training" in model.__code__.co_varnames: test_pred = model(test_data, is_training=False) test_loss = loss(test_pred, test_label).item() mean_test_loss = ( (count - 1) * mean_test_loss + test_loss) / count # use this criterion to calculate test_score if evaluate is not None: test_score = evaluate(test_data, test_label, is_training=False) mean_test_score = ((count - 1) * mean_test_score + test_score) / count if not draw_epoch: test_score_list.append(test_score) else: if count == iter_num: test_score_list.append(mean_test_score) else: test_pred = model(test_data) test_loss = loss(test_pred, test_label).item() mean_test_loss = ( (count - 1) * mean_test_loss + test_loss) / count # use this criterion to calculate test_score if evaluate is not None: test_score = evaluate(test_data, test_label) mean_test_score = ((count - 1) * mean_test_score + test_score) / count if not draw_epoch: test_score_list.append(test_score) else: if count == iter_num: test_score_list.append(mean_test_score) # function like the draw_epoch in train loss if not draw_epoch: test_loss_list.append(test_loss) else: if count == iter_num: test_loss_list.append(mean_test_loss) # update counter count += 1 # training bar if evaluate is None: mean_train_score = "-" mean_test_score = "-" if test_iter is None: mean_test_loss = "-" mean_test_score = "-" # use mean loss and score in each epoch process_bar.show_process(data_num, batch_size=batch_size, train_loss=mean_train_loss, train_score=mean_train_score, test_loss=mean_test_loss, test_score=mean_test_score) print("\n") # draw loss figure and score figure # especially, loss in train dataset use train_loss not mean_loss in drawing if draw: # set figure format set_fig_display(axes_spines_state=[True] * 4) # add new figure and subplot fig = plt.figure() ax1 = fig.add_subplot(111) ax1.plot(range(len(train_loss_list)), train_loss_list, label="train_loss") if len(test_loss_list) > 0: ax1.plot(range(len(train_loss_list)), test_loss_list, label="test_loss") if draw_epoch: ax1.set_xlabel("epoch num") else: ax1.set_xlabel("iteration num") ax1.set_ylabel("loss") # ax2 if evaluate is not None: ax2 = ax1.twinx() ax2.plot(range(len(train_score_list)), train_score_list, "c-", label="train_score", alpha=0.8) if len(test_score_list) > 0: ax2.plot(range(len(train_score_list)), test_score_list, "r-", label="test_score", alpha=0.8) ax2.set_ylabel("score") ax2.set_ylim([0, 1.1]) if test_iter is None and evaluate is None: legend_labels = ["train_loss"] legend_loc = [0.75, 0.82] elif test_iter is None: legend_labels = ["train_loss", "train_score"] legend_loc = [0.58, 0.82] elif evaluate is None: legend_labels = ["train_loss", "test_loss"] legend_loc = [0.595, 0.82] else: ax2.set_ylim([0, 1.18]) legend_labels = [ "train_loss", "test_loss", "train_score", "test_score" ] legend_loc = [0.58, 0.77] fig.legend(labels=legend_labels, ncol=2, loc=legend_loc) if save_fig: if not os.path.exists(save_path): os.mkdir(save_path) fig_name = "fig" + datetime.now().strftime( "%Y%m%d_%H%M%S") + ".jpg" plt.savefig(save_path + fig_name, dpi=200) plt.show()