# In[ ]: # if predicted proba >:= 0.5, this label is set to 1. if all probas < 0.5, the label with largest proba is set to 1 for i in range(pred.shape[0]): if len(np.where(pred[i] >= th)[0]) > 0: pred[i][pred[i] >= th] = 1 pred[i][pred[i] < th] = 0 else: max_index = np.argmax(pred[i]) pred[i] = 0 pred[i][max_index] = 1 # In[ ]: acc_val = hamming_score(y_val, pred) p_val, r_val, f1_val = f1(y_val, pred) # In[ ]: pred = deepcopy(pred_test) for i in range(pred.shape[0]): if len(np.where(pred[i] >= th)[0]) > 0: pred[i][pred[i] >= th] = 1 pred[i][pred[i] < th] = 0 else: max_index = np.argmax(pred[i]) pred[i] = 0 pred[i][max_index] = 1 acc_test = hamming_score(y_test, pred) p_test, r_test, f1_test = f1(y_test, pred)
def run(args, weights_matrix, output_size, train_data, train_target, val_data, val_target, test_data, test_target, tuning, ms_tags): import torch import torch.nn as nn from torch.nn import functional as F from torch.autograd import Variable from torch import optim from torch.utils import data as data_utils if args.baseline: from base import baseline as DAMIC elif args.wd: assert hasattr(args, 'tf') from DAMIC_wd import DAMIC elif args.stacked: from DAMIC_stacked import DAMIC else: from DAMIC import DAMIC device = torch.device("cuda" if torch.cuda.is_available() else "cpu") weights_matrix = torch.Tensor(weights_matrix) if tuning or sys.argv[1] == 'train': # Global setup hidden_size = args.lstm_hidden num_layers = args.lstm_layers n_epochs = args.epoch criterion = nn.BCELoss() # criterion = nn.MultiLabelSoftMarginLoss() patient = args.patient learning_rate = args.lr bi_lstm = args.bi n_filters = args.filters filter_sizes = args.filter_sizes c_dropout = args.cd l_dropout = args.ld batch_size = args.batch_size gru = args.gru highway = args.highway kmax = args.k if hasattr(args, 'tf') and args.tf is not None: teacher_forcing_ratio = args.tf else: teacher_forcing_ratio = None save_path = './model/'+randomword(10)+'/' if not tuning: print() print('Parameters') print('lstm_hidden_size', hidden_size) print('lstm_layers', num_layers) print('epochs', n_epochs) print('patient', patient) print('learning_rate', learning_rate) print('bi_lstm', bi_lstm) print('n_filters', n_filters) print('filter_sizes', filter_sizes) print('batch_size', batch_size) print('CNN dropout', c_dropout) print('LSTM dropout', l_dropout) print('Teacher Forcing rate', teacher_forcing_ratio) print('GRU', gru) print('RNN Highway', highway) print('k max pooling', kmax) print() print('model will be saved to', save_path) if not os.path.exists(save_path): os.makedirs(save_path) # torch.backends.cudnn.enabled = False model = DAMIC(hidden_size, output_size, bi_lstm, weights_matrix, num_layers, n_filters, filter_sizes, c_dropout, l_dropout, teacher_forcing_ratio, gru, highway, kmax) model = model.to(device) optimizer = optim.Adam(model.parameters(), lr=learning_rate) if torch.cuda.device_count() > 1: if not tuning: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) losses = np.zeros(n_epochs) vlosses = np.zeros(n_epochs) best_epoch = 0 stop_counter = 0 best_score = None train_loader_dataset = batch_maker(train_data, train_target, batch_size) val_loader_dataset = batch_maker(val_data, val_target, batch_size) # learning for epoch in range(n_epochs): ################### # train the model # ################### model.train() # prep model for training for data in train_loader_dataset: src_seqs, trg_seqs = data # inputs, targets = Variable(inputs.to(device)), Variable(targets.to(device)) src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device) outputs = model(src_seqs, trg_seqs) # print(outputs) outputs = outputs.to(device) optimizer.zero_grad() loss = criterion(outputs, trg_seqs) loss.backward() optimizer.step() # print(loss.item()) losses[epoch] += loss.item() if not tuning: print('epoch', epoch+1, ' average train loss: ', losses[epoch] / len(train_loader_dataset)) ###################### # validate the model # ###################### model.eval() # prep model for evaluation for i, data in enumerate(val_loader_dataset, 0): src_seqs, trg_seqs = data src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device) outputs = model(src_seqs, trg_seqs) # print(outputs) outputs = outputs.to(device) vlosses[epoch] += criterion(outputs, trg_seqs).item() if not tuning: print('epoch', epoch+1, ' average val loss: ', vlosses[epoch] / len(val_loader_dataset)) if best_score is None or vlosses[epoch] < best_score: best_score = vlosses[epoch] best_epoch = epoch+1 torch.save(model.state_dict(), save_path+str(best_epoch)) stop_counter = 0 if not tuning: print('epoch', best_epoch, 'model updated') else: stop_counter += 1 if stop_counter >= patient: print("Early stopping") break if not tuning: print('Models saved to', save_path) print('Best epoch', str(best_epoch), ', with score', str(best_score / len(val_loader_dataset))) if tuning or (sys.argv[1] == 'test' and len(sys.argv) > 2 and sys.argv[1] != ''): criterion = nn.BCELoss() test_discount = 1.0 if tuning: directory = save_path epoch = best_epoch result_file = '' loss_file = '' if teacher_forcing_ratio is not None: teacher_forcing_ratio = .0 else: directory = args.models[0] epoch = args.epoch result_file = args.output_result[0] loss_file = args.output_loss # Global setup hidden_size = args.lstm_hidden num_layers = args.lstm_layers bi_lstm = args.bi n_filters = args.filters filter_sizes = args.filter_sizes c_dropout = args.cd l_dropout = args.ld test_discount = args.discount batch_size = args.batch_size gru = args.gru highway = args.highway kmax = args.k if hasattr(args, 'tf') and args.tf is not None: teacher_forcing_ratio = .0 else: teacher_forcing_ratio = None if not tuning: print('lstm_hidden_size', hidden_size) print('lstm_layers', num_layers) print('bi_lstm', bi_lstm) print('n_filters', n_filters) print('filter_sizes', filter_sizes) print('batch_size', batch_size) print('CNN dropout', c_dropout) print('LSTM dropout', l_dropout) print('test discount', test_discount) print('Teacher Forcing rate', teacher_forcing_ratio) print('GRU', gru) print('RNN Highway', highway) print('k max pooling', kmax) if result_file and result_file != '': outf = open(result_file, 'w') out = 'dialogue_id, utterance_id, dialogue_length, utterance_length, utterance, references, predictions, hamming_score, p, r, f1\n' if loss_file and loss_file != '': lfile = open(loss_file, 'w') lout = '' bloss = 9999999.99; breferences = [] bpredicts = [] bfile = '' model = DAMIC(hidden_size, output_size, bi_lstm, weights_matrix, num_layers, n_filters, filter_sizes, c_dropout, l_dropout, teacher_forcing_ratio, gru, highway, kmax) model = model.to(device) if torch.cuda.device_count() > 1: if not tuning: print("Let's use", torch.cuda.device_count(), "GPUs!") # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs model = nn.DataParallel(model) for filename in os.listdir(directory): if '.' in filename: continue # print('Epoch', filename) if loss_file and loss_file != '': lout = lout + filename if epoch > 0 and filename != str(epoch): # print('skipped') continue model.load_state_dict(torch.load(directory+filename)) model.eval() train_loader_dataset = batch_maker(train_data, train_target, batch_size) val_loader_dataset = batch_maker(val_data, val_target, batch_size) test_loader_dataset = batch_maker(test_data, test_target, batch_size) loss = 0.0 # For plotting for data in train_loader_dataset: src_seqs, trg_seqs = data src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device) outputs = model(src_seqs, trg_seqs) # print(outputs) outputs = outputs.to(device) loss += criterion(outputs, trg_seqs).item() tloss = loss / len(train_loader_dataset) if loss_file and loss_file != '': lout = lout + ',' + str(tloss) if not tuning: print('Epoch', filename, 'average train loss: ', tloss) loss = 0.0 references = None predicts = None for data in val_loader_dataset: src_seqs, trg_seqs = data src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device) outputs = model(src_seqs, trg_seqs) # print(outputs) outputs = outputs.to(device) loss += criterion(outputs, trg_seqs).item() reference = flattern_result(trg_seqs.cpu().numpy()) predict = flattern_result(outputs.detach().cpu().numpy()) if references is None or predicts is None: references = reference predicts = predict else: # print(predicts, predict) references = np.append(references, reference, axis=0) predicts = np.append(predicts, predict, axis=0) # print(references) vloss = loss / len(val_loader_dataset) if loss_file and loss_file != '': lout = lout + ',' + str(vloss) + '\n' if not tuning: print('Epoch', filename, 'average val loss: ', vloss) if vloss < bloss: bloss = vloss breferences = np.array(references); bpredicts = np.array(predicts); bfile = filename torch.cuda.empty_cache() best_score, thresholds = best_score_search(breferences, bpredicts, hamming_score) if not tuning: print('best validation epoch:', bfile, 'with score:', str(best_score)) # load the best model model.load_state_dict(torch.load(directory+bfile)) model.eval() loss = 0.0 # For plotting references = None predicts = None for data in test_loader_dataset: src_seqs, trg_seqs = data src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device) outputs = model(src_seqs, trg_seqs) # print(outputs) outputs = outputs.to(device) loss += criterion(outputs, trg_seqs).item() reference = flattern_result(trg_seqs.cpu().numpy()) predict = flattern_result(outputs.detach().cpu().numpy()) if references is None or predicts is None: references = reference predicts = predict else: references = np.append(references, reference, axis=0) predicts = np.append(predicts, predict, axis=0) # print('p', p) # print('r', r) if result_file and result_file != '': for d in src_seqs out = out + str(len(predict)) + ',' + str(len(X_test[i][j].split())) + ',"' + X_test[i][j] + '",' + vector2tags(r, ms_tags) + ',' + vector2tags(p, ms_tags) + ',' + str(hamming_score(r, p)) + ',' + str(f1(r, p)[0]) + ',' + str(f1(r, p)[1]) + ',' + str(f1(r, p)[2]) + '\n' tloss = loss / len(test_loader_dataset) if not tuning: print('average test loss: ', tloss) torch.cuda.empty_cache() predictions = [] for j in range(len(predicts)): predictions.append(ret_predict(predicts[j], thresholds)) # print(predictions) references = np.array(references); predictions = np.array(predictions); acc = hamming_score(y_true=references, y_pred=predictions) f1_scores = f1(y_true=references, y_pred=predictions) scores = str(acc) + ',' + ','.join([str(x) for x in f1_scores]) print('Test Accuracy, Precision, Recall and F1 score: ', scores) # f1 = f1_score(y_true=references, y_pred=predicts, average='weighted') # print('weighted F1 score: ', f1) # print('weighted F1 score by chance: ', f1_score(y_true=references, y_pred=predicts_r, average='weighted')) if not tuning: print('Tag',':','Accuracy, (Precision, Recall, F1)') for i in range(predictions.shape[1]): predictions_t = np.array([[p[i]] for p in predictions]) references_t = np.array([[r[i]]for r in references]) print(ms_tags[i], ':',hamming_score(y_true=references_t, y_pred=predictions_t),',', f1(y_true=references_t, y_pred=predictions_t)) if result_file and result_file != '': outf.write(out) if loss_file and loss_file != '': lfile.write(lout) return {'loss': -acc, 'status': STATUS_OK }