# In[ ]:

    # if predicted proba >:= 0.5, this label is set to 1. if all probas < 0.5, the label with largest proba is set to 1
    for i in range(pred.shape[0]):
        if len(np.where(pred[i] >= th)[0]) > 0:
            pred[i][pred[i] >= th] = 1
            pred[i][pred[i] < th] = 0
        else:
            max_index = np.argmax(pred[i])
            pred[i] = 0
            pred[i][max_index] = 1

    # In[ ]:

    acc_val = hamming_score(y_val, pred)
    p_val, r_val, f1_val = f1(y_val, pred)

    # In[ ]:
    pred = deepcopy(pred_test)

    for i in range(pred.shape[0]):
        if len(np.where(pred[i] >= th)[0]) > 0:
            pred[i][pred[i] >= th] = 1
            pred[i][pred[i] < th] = 0
        else:
            max_index = np.argmax(pred[i])
            pred[i] = 0
            pred[i][max_index] = 1
    acc_test = hamming_score(y_test, pred)
    p_test, r_test, f1_test = f1(y_test, pred)
Beispiel #2
0
def run(args, weights_matrix, output_size, train_data, train_target, val_data, val_target, test_data, test_target, tuning, ms_tags):
    import torch
    import torch.nn as nn
    from torch.nn import functional as F
    from torch.autograd import Variable
    from torch import optim
    from torch.utils import data as data_utils

    if args.baseline:
        from base import baseline as DAMIC
    elif args.wd:
        assert hasattr(args, 'tf')
        from DAMIC_wd import DAMIC
    elif args.stacked:
        from DAMIC_stacked import DAMIC
    else:
        from DAMIC import DAMIC

    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    weights_matrix = torch.Tensor(weights_matrix)

    if tuning or sys.argv[1] == 'train':

        # Global setup
        hidden_size = args.lstm_hidden
        num_layers = args.lstm_layers
        n_epochs = args.epoch
        criterion = nn.BCELoss()
        # criterion = nn.MultiLabelSoftMarginLoss()
        patient = args.patient
        learning_rate = args.lr
        bi_lstm = args.bi
        n_filters = args.filters
        filter_sizes = args.filter_sizes
        c_dropout = args.cd
        l_dropout = args.ld
        batch_size = args.batch_size
        gru = args.gru
        highway = args.highway
        kmax = args.k
        if hasattr(args, 'tf') and args.tf is not None:
            teacher_forcing_ratio = args.tf
        else:
            teacher_forcing_ratio = None

        save_path = './model/'+randomword(10)+'/'

        if not tuning: 
            print()
            print('Parameters')
            print('lstm_hidden_size', hidden_size)
            print('lstm_layers', num_layers)
            print('epochs', n_epochs)
            print('patient', patient)
            print('learning_rate', learning_rate)
            print('bi_lstm', bi_lstm)
            print('n_filters', n_filters)
            print('filter_sizes', filter_sizes)
            print('batch_size', batch_size)
            print('CNN dropout', c_dropout)
            print('LSTM dropout', l_dropout)
            print('Teacher Forcing rate', teacher_forcing_ratio)
            print('GRU', gru)
            print('RNN Highway', highway)
            print('k max pooling', kmax)
            print()
        print('model will be saved to', save_path)
        if not os.path.exists(save_path):
            os.makedirs(save_path)

        # torch.backends.cudnn.enabled = False
        model = DAMIC(hidden_size, output_size, bi_lstm, weights_matrix, num_layers, n_filters, filter_sizes, c_dropout, l_dropout, teacher_forcing_ratio, gru, highway, kmax)
        model = model.to(device)
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        if torch.cuda.device_count() > 1:
            if not tuning:
                print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            model = nn.DataParallel(model)

        losses = np.zeros(n_epochs)
        vlosses = np.zeros(n_epochs)

        best_epoch = 0
        stop_counter = 0
        best_score = None

        train_loader_dataset = batch_maker(train_data, train_target, batch_size)
        val_loader_dataset = batch_maker(val_data, val_target, batch_size)
        # learning
        for epoch in range(n_epochs):
            ###################
            # train the model #
            ###################
            model.train() # prep model for training

            for data in train_loader_dataset:
                src_seqs, trg_seqs = data
                # inputs, targets = Variable(inputs.to(device)), Variable(targets.to(device))
                src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device)

                outputs = model(src_seqs, trg_seqs)

                # print(outputs)
                outputs = outputs.to(device)

                optimizer.zero_grad()
                loss = criterion(outputs, trg_seqs)
                loss.backward()
                optimizer.step()
                # print(loss.item())
                losses[epoch] += loss.item()
            if not tuning:
                print('epoch', epoch+1, ' average train loss: ', losses[epoch] / len(train_loader_dataset))

            ######################    
            # validate the model #
            ######################
            model.eval() # prep model for evaluation

            for i, data in enumerate(val_loader_dataset, 0):
                src_seqs, trg_seqs = data    
                src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device)

                outputs = model(src_seqs, trg_seqs)

                # print(outputs)
                outputs = outputs.to(device)
                vlosses[epoch] += criterion(outputs, trg_seqs).item()
            if not tuning:
                print('epoch', epoch+1, ' average val loss: ', vlosses[epoch] / len(val_loader_dataset))

            if best_score is None or vlosses[epoch] < best_score:
                best_score = vlosses[epoch]
                best_epoch = epoch+1
                torch.save(model.state_dict(), save_path+str(best_epoch))
                stop_counter = 0
                if not tuning:
                    print('epoch', best_epoch, 'model updated')
            else:
                stop_counter += 1

            if stop_counter >= patient:
                print("Early stopping")
                break
        if not tuning:
            print('Models saved to', save_path)
            print('Best epoch', str(best_epoch), ', with score', str(best_score / len(val_loader_dataset)))


    if tuning or (sys.argv[1] == 'test' and len(sys.argv) > 2 and sys.argv[1] != ''):

        criterion = nn.BCELoss()
        test_discount = 1.0

        if tuning:
            directory = save_path
            epoch = best_epoch
            result_file = ''
            loss_file = ''
            if teacher_forcing_ratio is not None:
                teacher_forcing_ratio = .0
        else:
            directory = args.models[0]
            epoch = args.epoch
            result_file = args.output_result[0]
            loss_file = args.output_loss

            # Global setup
            hidden_size = args.lstm_hidden
            num_layers = args.lstm_layers
            bi_lstm = args.bi
            n_filters = args.filters
            filter_sizes = args.filter_sizes
            c_dropout = args.cd
            l_dropout = args.ld
            test_discount = args.discount
            batch_size = args.batch_size
            gru = args.gru
            highway = args.highway
            kmax = args.k

            if hasattr(args, 'tf') and args.tf is not None:
                teacher_forcing_ratio = .0
            else:
                teacher_forcing_ratio = None
            

        if not tuning:
            print('lstm_hidden_size', hidden_size)
            print('lstm_layers', num_layers)
            print('bi_lstm', bi_lstm)
            print('n_filters', n_filters)
            print('filter_sizes', filter_sizes)
            print('batch_size', batch_size)
            print('CNN dropout', c_dropout)
            print('LSTM dropout', l_dropout)
            print('test discount', test_discount)
            print('Teacher Forcing rate', teacher_forcing_ratio)
            print('GRU', gru)
            print('RNN Highway', highway)
            print('k max pooling', kmax)

        if result_file and result_file != '':
            outf = open(result_file, 'w')
            out = 'dialogue_id, utterance_id, dialogue_length, utterance_length, utterance, references, predictions, hamming_score, p, r, f1\n'
        if loss_file and loss_file != '':
            lfile = open(loss_file, 'w')
            lout = ''

        bloss = 9999999.99;
        breferences = []
        bpredicts = []
        bfile = ''

        model = DAMIC(hidden_size, output_size, bi_lstm, weights_matrix, num_layers, n_filters, filter_sizes, c_dropout, l_dropout, teacher_forcing_ratio, gru, highway, kmax)
        model = model.to(device)

        if torch.cuda.device_count() > 1:
            if not tuning:
                print("Let's use", torch.cuda.device_count(), "GPUs!")
            # dim = 0 [30, xxx] -> [10, ...], [10, ...], [10, ...] on 3 GPUs
            model = nn.DataParallel(model)

        for filename in os.listdir(directory):
            if '.' in filename: continue
            # print('Epoch', filename)
            if loss_file and loss_file != '':
                lout = lout + filename
            if epoch > 0 and filename != str(epoch):
                # print('skipped')
                continue

            model.load_state_dict(torch.load(directory+filename))
            model.eval()

            train_loader_dataset = batch_maker(train_data, train_target, batch_size)
            val_loader_dataset = batch_maker(val_data, val_target, batch_size)
            test_loader_dataset = batch_maker(test_data, test_target, batch_size)

            loss = 0.0 # For plotting
            for data in train_loader_dataset:
                src_seqs, trg_seqs = data
                src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device)

                outputs = model(src_seqs, trg_seqs)

                # print(outputs)
                outputs = outputs.to(device)
                loss += criterion(outputs, trg_seqs).item()

            tloss = loss / len(train_loader_dataset)
            if loss_file and loss_file != '':
                lout = lout + ',' + str(tloss)
            if not tuning:
                print('Epoch', filename, 'average train loss: ', tloss)

            
            loss = 0.0
            references = None
            predicts = None
            for data in val_loader_dataset:
                src_seqs, trg_seqs = data    
                src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device)

                outputs = model(src_seqs, trg_seqs)

                # print(outputs)
                outputs = outputs.to(device)
                loss += criterion(outputs, trg_seqs).item()

                reference = flattern_result(trg_seqs.cpu().numpy())
                predict = flattern_result(outputs.detach().cpu().numpy())

                if references is None or predicts is None:
                    references = reference
                    predicts = predict
                else:
                    # print(predicts, predict)
                    references = np.append(references, reference, axis=0)
                    predicts = np.append(predicts, predict, axis=0)

            # print(references)

            vloss = loss / len(val_loader_dataset)
            if loss_file and loss_file != '':
                lout = lout + ',' + str(vloss) + '\n'
            if not tuning:
                print('Epoch', filename, 'average val loss: ', vloss)

            if vloss < bloss:
                bloss = vloss
                breferences = np.array(references);
                bpredicts = np.array(predicts);
                bfile = filename

            torch.cuda.empty_cache()

        best_score, thresholds = best_score_search(breferences, bpredicts, hamming_score)
        if not tuning:
            print('best validation epoch:', bfile, 'with score:', str(best_score))

        # load the best model
        model.load_state_dict(torch.load(directory+bfile))
        model.eval()

        loss = 0.0 # For plotting
        references = None
        predicts = None

        for data in test_loader_dataset:
            src_seqs, trg_seqs = data    
            src_seqs, trg_seqs = src_seqs.to(device), trg_seqs.to(device)

            outputs = model(src_seqs, trg_seqs)

            # print(outputs)
            outputs = outputs.to(device)
            loss += criterion(outputs, trg_seqs).item()

            reference = flattern_result(trg_seqs.cpu().numpy())
            predict = flattern_result(outputs.detach().cpu().numpy())

            if references is None or predicts is None:
                references = reference
                predicts = predict
            else:
                references = np.append(references, reference, axis=0)
                predicts = np.append(predicts, predict, axis=0)
                # print('p', p)
                # print('r', r)
            if result_file and result_file != '':
                for d in src_seqs
                    out = out + str(len(predict)) + ',' + str(len(X_test[i][j].split())) + ',"' + X_test[i][j] + '",' + vector2tags(r, ms_tags) + ',' + vector2tags(p, ms_tags) + ',' + str(hamming_score(r, p)) + ',' + str(f1(r, p)[0]) + ',' + str(f1(r, p)[1]) + ',' + str(f1(r, p)[2]) + '\n'

        tloss = loss / len(test_loader_dataset)
        if not tuning:
            print('average test loss: ', tloss)

        torch.cuda.empty_cache()

        predictions = []

        for j in range(len(predicts)):
            predictions.append(ret_predict(predicts[j], thresholds))

        # print(predictions)

        references = np.array(references);
        predictions = np.array(predictions);

        acc = hamming_score(y_true=references, y_pred=predictions)
        f1_scores = f1(y_true=references, y_pred=predictions)

        scores = str(acc) + ',' + ','.join([str(x) for x in f1_scores])
        print('Test Accuracy, Precision, Recall and F1 score: ', scores)
        # f1 = f1_score(y_true=references, y_pred=predicts, average='weighted')
        # print('weighted F1 score: ', f1)
        
        # print('weighted F1 score by chance: ', f1_score(y_true=references, y_pred=predicts_r, average='weighted'))
        if not tuning:
            print('Tag',':','Accuracy, (Precision, Recall, F1)')
            for i in range(predictions.shape[1]):
                predictions_t = np.array([[p[i]] for p in predictions])
                references_t = np.array([[r[i]]for r in references])
                print(ms_tags[i], ':',hamming_score(y_true=references_t, y_pred=predictions_t),',', f1(y_true=references_t, y_pred=predictions_t))

            if result_file and result_file != '':
                outf.write(out)

            if loss_file and loss_file != '':
                lfile.write(lout)

        return {'loss': -acc, 'status': STATUS_OK }