예제 #1
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)
    # loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -10
    best_model = None

    dev_info = {'acc': [], 'p': [], 'f': [], 'r': []}
    test_info = {'acc': [], 'p': [], 'f': [], 'r': []}

    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        print("Shuffle: first input word list:", data.train_Ids[0][0])
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu, True, data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask,
                                         data.sentence_classification)
            right_token += right
            whole_token += whole
            # print("loss:",loss.item())
            sample_loss += loss.item()
            total_loss += loss.item()
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print(
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                    )
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)
        # continue
        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_info['acc'].append(acc)
        dev_info['p'].append(p)
        dev_info['r'].append(r)
        dev_info['f'].append(f)

        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = data.model_dir + '.' + str(idx) + ".model"

            best_model_dict = deepcopy(model.state_dict())
            best_dev = current_score

        # ## decode test
        if idx % 10 == 0:
            evaluate_and_print(data, model, 'test')
            evaluate_and_print(data, model, 'test_augment')
            evaluate_and_print(data, model, 'test_harder')

        gc.collect()

    # add test for best model
    print("======BEST MODEL TEST======")
    print("Save current best model in file:", model_name)
    torch.save(best_model_dict, model_name)
    model.load_state_dict(best_model_dict)

    acc, p, r, f = evaluate_and_print_return(data, model, 'test')
    acc_a, p_a, r_a, f_a = evaluate_and_print_return(data, model,
                                                     'test_augment')
    acc_h, p_h, r_h, f_h = evaluate_and_print_return(data, model,
                                                     'test_harder')
    print("======BEST DEV=======: {}".format(best_dev))

    return best_dev, acc, p, r, f, acc_a, p_a, r_a, f_a, acc_h, p_h, r_h, f_h, dev_info
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)

    if data.output_tsv_path:
        # Use line buffering
        output_tsv = open(data.output_tsv_path, "w", buffering=1)
        print("\t".join(TSV_HEADER), file=output_tsv)
    else:
        output_tsv = None

    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)

    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -10
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        print("Shuffle: first input word list:", data.train_Ids[0][0])
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu, True, data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.calculate_loss(batch_word, batch_features,
                                                 batch_wordlen, batch_char,
                                                 batch_charlen,
                                                 batch_charrecover,
                                                 batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask,
                                         data.sentence_classification)
            right_token += right
            whole_token += whole
            # print("loss:",loss.item())
            sample_loss += loss.item()
            total_loss += loss.item()
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print(
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                    )
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)
        # continue
        speed, (acc, p, r,
                f), (internal_acc, internal_p, internal_r,
                     internal_f), _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = data.model_dir + ".best.model"
            print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score

        if output_tsv:
            print("\t".join(
                str(item) for item in [
                    data.tagScheme, data.random_seed, idx +
                    1, "Dev", total_loss, acc, p, r, f, internal_acc,
                    internal_p, internal_r, internal_f
                ]),
                  file=output_tsv)

        # ## decode test
        speed, (acc, p, r,
                f), (internal_acc, internal_p, internal_r,
                     internal_f), _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))

        if output_tsv:
            print("\t".join(
                str(item) for item in [
                    data.tagScheme, data.random_seed, idx +
                    1, "Test", total_loss, acc, p, r, f, internal_acc,
                    internal_p, internal_r, internal_f
                ]),
                  file=output_tsv)
        gc.collect()

    if output_tsv:
        output_tsv.close()
예제 #3
0
파일: main.py 프로젝트: aghie/disco2labels
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)
        print(model)

    loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -sys.maxsize - 1
    best_dev_only_disco = -sys.maxsize - 1
    current_score_disco = -sys.maxsize - 1
    # data.HP_iteration = 1
    ## start training
    if data.log_file is not None:
        f_log = open(data.log_file, "w")
        f_log.write("\t".join(["Epoch", "F-Score", "F-Score-disco"]) + "\n")
        f_log_last_output = open(data.log_file + ".last_output", "w")
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0

        sample_loss = {idtask: 0 for idtask in range(data.HP_tasks)}
        right_token = {idtask: 0 for idtask in range(data.HP_tasks)}
        whole_token = {idtask: 0 for idtask in range(data.HP_tasks)}
        random.shuffle(data.train_Ids)

        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1

        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu, False, False)
            instance_count += 1
            loss, losses, tag_seq = model.calculate_loss(batch_word,
                                                         batch_features,
                                                         batch_wordlen,
                                                         batch_char,
                                                         batch_charlen,
                                                         batch_charrecover,
                                                         batch_label,
                                                         mask,
                                                         inference=False)
            for idtask in range(data.HP_tasks):
                right, whole = predict_check(tag_seq[idtask],
                                             batch_label[idtask], mask)
                sample_loss[idtask] += losses[idtask].item()
                right_token[idtask] += right
                whole_token[idtask] += whole

                if end % 500 == 0:
                    temp_time = time.time()
                    temp_cost = temp_time - temp_start
                    temp_start = temp_time
                    print(
                        "     Instance: %s; Task %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                        % (end, idtask, temp_cost, sample_loss[idtask],
                           right_token[idtask], whole_token[idtask],
                           (right_token[idtask] + 0.) / whole_token[idtask]))
                    if sample_loss[idtask] > 1e8 or str(sample_loss) == "nan":
                        print
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                        exit(0)
                    sys.stdout.flush()
                    sample_loss[idtask] = 0

            if end % 500 == 0:
                print(
                    "--------------------------------------------------------------------------"
                )

            total_loss += loss.item()
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start

        for idtask in range(data.HP_tasks):
            print(
                "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
                (end, temp_cost, sample_loss[idtask], right_token[idtask],
                 whole_token[idtask],
                 (right_token[idtask] + 0.) / whole_token[idtask]))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)

        init_eval_time = time.time()
        summary = evaluate(data, model, "dev", False, False)
        print("Evaluation time {}".format(time.time() - init_eval_time))

        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        current_scores = []
        for idtask in range(0, data.HP_tasks):
            speed, acc, p, r, f, pred_labels, _ = summary[idtask]
            if data.seg:
                current_scores.append(f)
                print(
                    "Task %d Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (idtask, dev_cost, speed, acc, p, r, f))
            else:
                current_scores.append(acc)
                print("Task %d Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                      (idtask, dev_cost, speed, acc))
        pred_results_tasks = []
        pred_scores_tasks = []

        for idtask in range(data.HP_tasks):
            speed, acc, p, r, f, pred_results, pred_scores = summary[idtask]
            pred_results_tasks.append(pred_results)
            pred_scores_tasks.append(pred_scores_tasks)

        data.decode_dir = tempfile.NamedTemporaryFile().name
        data.write_decoded_results(pred_results_tasks, 'dev')

        if data.log_file is not None:
            copyfile(data.decode_dir, f_log_last_output.name)

        if data.optimize_with_evalb:

            tmp_trees_file = tempfile.NamedTemporaryFile()
            command = [
                "python",
                data.disco_decode_script,  #"decode.py ",
                "--input",
                data.decode_dir,
                "--output",
                tmp_trees_file.name,
                "--disc" if data.disco_encoder is not None else "",
                "--split_char",
                data.label_split_char,
                "--os" if data.dummy_os else "",
                "--disco_encoder " +
                data.disco_encoder if data.disco_encoder is not None else "",
                "" if not data.add_leaf_unary_column else
                "--add_leaf_unary_column",
                "--path_reduced_tagset " + data.path_reduced_tagset
                if data.path_reduced_tagset is not None else ""
            ]

            p = subprocess.Popen(" ".join(command),
                                 stdout=subprocess.PIPE,
                                 shell=True)
            out, err = p.communicate()
            out = out.decode("utf-8")

            if data.disco_encoder is not None:
                command = [
                    "discodop", "eval", data.gold_dev_trees,
                    tmp_trees_file.name, data.evalb_param_file, "--fmt",
                    "discbracket"
                ]

                p = subprocess.Popen(" ".join(command),
                                     stdout=subprocess.PIPE,
                                     shell=True)
                out, err = p.communicate()
                out = out.decode("utf-8")

                current_score = float([
                    l for l in out.split("\n")
                    if l.startswith("labeled f-measure:")
                ][0].rsplit(" ", 1)[1])

                #Computing the score for discontinuous trees only

                command = [
                    "discodop", "eval", data.gold_dev_trees,
                    tmp_trees_file.name, data.evalb_param_file, "--fmt",
                    "discbracket", "--disconly"
                ]

                p = subprocess.Popen(" ".join(command),
                                     stdout=subprocess.PIPE,
                                     shell=True)
                out, err = p.communicate()
                out = out.decode("utf-8")

                current_score_disco = float([
                    l for l in out.split("\n")
                    if l.startswith("labeled f-measure:")
                ][0].rsplit(" ", 1)[1])

            else:

                command = [
                    data.evalb, tmp_trees_file.name, data.gold_dev_trees
                ]
                #For legacy with how previous models were trained
                if data.evalb_param_file is not None:
                    command.extend(["-p", data.evalb_param_file])

                p = subprocess.Popen(" ".join(command),
                                     stdout=subprocess.PIPE,
                                     shell=True)
                out, err = p.communicate()
                out = out.decode("utf-8")

                current_score = float([
                    l for l in out.split("\n")
                    if l.startswith("Bracketing FMeasure")
                ][0].split("=")[1])

        os.remove(data.decode_dir)

        tagging_score = sum(current_scores) / len(current_scores)
        print("The tagging accuracy is:", tagging_score)

        if not data.optimize_with_evalb:
            current_score = tagging_score

        print("The overall dev score for this epoch is: {} ".format(
            current_score))
        print("The overall previous best dev score was: {} ".format(best_dev))
        if data.disco_encoder is not None:
            print(
                "The dev score for this continuous trees in this epoch is: {}".
                format(current_score_disco))
            print("The previous discontinuous score of the best model is: {} ".
                  format(best_dev_only_disco))

        if current_score > best_dev:
            model_name = data.model_dir + ".model"
            print("Overwriting model in", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
            best_dev_only_disco = current_score_disco

        summary = evaluate(data, model, "test", False)

        test_finish = time.time()
        test_cost = test_finish - dev_finish

        for idtask in range(0, data.HP_tasks):
            speed, acc, p, r, f, _, _ = summary[idtask]
            if data.seg:
                #  current_score = f
                print(
                    "Task %d Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (idtask, test_cost, speed, acc, p, r, f))
            else:
                #   current_score = acc
                print("Task %d Test: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                      (idtask, test_cost, speed, acc))

        if data.log_file is not None:
            f_log.write("{}\t{}\t{}\n".format(idx, current_score,
                                              current_score_disco))
            f_log.flush()

        gc.collect()
예제 #4
0
파일: main.py 프로젝트: mahatmaWM/NCRFpp
def train(data):
    logging.info("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)

    # loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        logging.info("Optimizer illegal: %s" % (data.optimizer))
        exit(1)

    best_dev = -10
    # start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        logging.info("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        random.shuffle(data.train_Ids)
        logging.info("Shuffle: first input word list:%s" %
                     data.train_Ids[0][0])

        # set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num

            # 一个batch内的input
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, \
            batch_charrecover, batch_label, mask = batchify_with_label(instance,
                                                                       data.HP_gpu,
                                                                       True,
                                                                       data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(
                batch_word, batch_features, batch_wordlen, batch_char,
                batch_charlen, batch_charrecover, batch_label, mask)
            right, whole = predict_check(tag_seq, batch_label, mask,
                                         data.sentence_classification)
            right_token += right
            whole_token += whole
            # logging.info("loss:",loss.item())
            sample_loss += loss.item()
            total_loss += loss.item()
            if end % 50000 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                logging.info(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    logging.info(
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                    )
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        logging.info(
            "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
            (end, temp_cost, sample_loss, right_token, whole_token,
             (right_token + 0.) / whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        logging.info(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        logging.info("totalloss: %s" % total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            logging.info(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)
        # continue
        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            logging.info(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            logging.info("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                         (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                logging.info("Exceed previous best f score: %s" % best_dev)
            else:
                logging.info("Exceed previous best acc score:%s" % best_dev)
            model_name = data.model_dir + '.' + str(idx) + ".model"
            logging.info("Save current best model in file: %s" % model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # decode test
        speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            logging.info(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            logging.info("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                         (test_cost, speed, acc))
        gc.collect()
예제 #5
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)

    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)

    def freeze_net(model):
        for p in model.word_hidden.wordrep.word_embedding.parameters():
            p.requires_grad = False

    if data.tune_wordemb == False:
        freeze_net(model)

    best_dev = -10
    best_test = -10
    bad_counter = 0
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        cc = list(zip(data.train_Ids, data.train_texts))
        random.shuffle(cc)
        data.train_Ids[:], data.train_texts[:] = zip(*cc)
        print("Shuffle: first input word list:", data.train_Ids[0][0])
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1
        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            instance_text = data.train_texts[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_elmo_char, batch_adj = batchify_with_label(
                instance, instance_text, data.HP_gpu, True,
                data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.calculate_loss(batch_word, batch_features,
                                                 batch_wordlen, batch_char,
                                                 batch_charlen,
                                                 batch_charrecover,
                                                 batch_label, mask,
                                                 batch_elmo_char, batch_adj)
            right, whole = predict_check(tag_seq, batch_label, mask,
                                         data.sentence_classification)
            right_token += right
            whole_token += whole
            # print("loss:",loss.item())
            sample_loss += loss.item()
            total_loss += loss.item()
            if end % 500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                print(
                    "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                    % (end, temp_cost, sample_loss, right_token, whole_token,
                       (right_token + 0.) / whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print(
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                    )
                    exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
              (end, temp_cost, sample_loss, right_token, whole_token,
               (right_token + 0.) / whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)
        # continue
        speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.seg:
            current_score = f
            print(
                "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (dev_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                  (dev_cost, speed, acc))

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best dev f score:", best_dev)
            else:
                print("Exceed previous best dev acc score:", best_dev)
            # model_name = data.model_dir +'.'+ str(idx) + ".model"
            model_name = data.model_dir + ".dev.model"
            # print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score

        #     bad_counter = 0
        # else:
        #     bad_counter += 1
        # ## decode test
        speed, acc, p, r, f, _, _ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            current_score = f
            print(
                "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                % (test_cost, speed, acc, p, r, f))
        else:
            current_score = acc
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                  (test_cost, speed, acc))

        if current_score > best_test:
            if data.seg:
                print("Exceed previous best test f score:", best_test)
            else:
                print("Exceed previous best test acc score:", best_test)

            model_name = data.model_dir + ".test.model"
            torch.save(model.state_dict(), model_name)
            best_test = current_score

            bad_counter = 0
        else:
            bad_counter += 1

        gc.collect()

        if bad_counter >= data.patience:
            print('Early Stop!')
            break
예제 #6
0
def train(data):
    print("Training model...")
    device = torch.device(
        'cuda' if torch.cuda.is_available() and data.HP_gpu else 'cpu')
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data).to(device)
    else:
        model = SeqLabel(data).to(device)
    # for name, param in model.named_parameters():
    #     if param.requires_grad:
    #         print(name)
    ## compute model parameter num
    n_all_param = sum([p.nelement() for p in model.parameters()])
    n_emb_param = sum([
        p.nelement() for p in (
            model.word_hidden.wordrep.word_embedding.weight,
            model.word_hidden.wordrep.char_feature.char_embeddings.weight)
    ])
    print("all parameters=%s, emb parameters=%s, other parameters=%s" %
          (n_all_param, n_emb_param, n_all_param - n_emb_param))

    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -10
    test_f = []
    dev_f = []
    best_epoch = 0
    train_dataset_S = Multi_Task_Dataset(data.train_Ids_S, data.HP_batch_size)
    train_dataset_T = Multi_Task_Dataset(data.train_Ids_T, data.HP_batch_size)
    total_step = 0
    target_end, source_end = False, False
    epoch_idx = 0
    epoch_start = True  # this step is the start of an epoch
    ## start training
    while epoch_idx < data.HP_iteration:
        if epoch_start:
            epoch_start = False
            epoch_loss = 0
            epoch_start_time = time.time()
            print("Epoch: %s/%s" % (epoch_idx, data.HP_iteration))
            if data.optimizer == "SGD":
                optimizer = lr_decay(optimizer, epoch_idx, data.HP_lr_decay,
                                     data.HP_lr)
            model.train()
            model.zero_grad()
        if total_step % 2 == 0:
            domain_tag = 'Target'
            batch_instance, target_end = train_dataset_T.next_batch()
        else:
            domain_tag = 'Source'
            batch_instance, source_end = train_dataset_S.next_batch()
        if len(batch_instance) == 0:
            continue
        original_words_batch, batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, batch_entity, mask = \
            batchify_with_label(batch_instance, data.HP_gpu, True, data.sentence_classification)
        loss, entity_loss, atten_probs_loss = model.calculate_loss(
            original_words_batch, domain_tag, batch_word, batch_features,
            batch_wordlen, batch_char, batch_charlen, batch_charrecover,
            batch_label, batch_entity, mask)
        rate = data.HP_target_loss_rate if domain_tag == "Target" else 1.0  # 2:1 for twitter 1.6:1 for bionlp 1.5:1 for broad twitter
        loss_ = rate * loss + entity_loss + atten_probs_loss
        epoch_loss += loss_.item()
        loss_.backward()
        optimizer.step()
        model.zero_grad()
        total_step += 1

        ## evaluation
        if target_end:
            epoch_finish_time = time.time()
            epoch_cost = epoch_finish_time - epoch_start_time
            print("Epoch: %s training finished. Time: %.2fs" %
                  (epoch_idx, epoch_cost))
            print("totalloss:", epoch_loss)
            if epoch_loss > 1e8 or str(epoch_loss) == "nan":
                print(
                    "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                )
                exit(1)
                continue
            ## decode Target dev
            speed, acc, p, r, f, _, _ = evaluate("Target", data, model, "dev")
            dev_finish_time = time.time()
            dev_cost = dev_finish_time - epoch_finish_time
            if data.seg:
                current_score = f
                print(
                    "Dev (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (dev_cost, speed, acc, p, r, f))
            else:
                current_score = acc
                print("Dev (Target): time: %.2fs speed: %.2fst/s; acc: %.4f" %
                      (dev_cost, speed, acc))
            dev_f.append(current_score)

            if current_score > best_dev:
                best_epoch = epoch_idx
                if data.seg:
                    print("Exceed previous best f score:", best_dev)
                else:
                    print("Exceed previous best acc score:", best_dev)
                model_name = data.model_dir + ".model"
                print("Save current best model in file:", model_name)
                torch.save(model.state_dict(), model_name)
                best_dev = current_score

            ## decode Target test
            speed, acc, p, r, f, _, _ = evaluate("Target", data, model, "test")
            test_finish_time = time.time()
            test_cost = test_finish_time - dev_finish_time
            if data.seg:
                print(
                    "Test (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (test_cost, speed, acc, p, r, f))
                test_f.append(f)
            else:
                print(
                    "Test (Target): time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                    (test_cost, speed, acc))
                test_f.append(acc)
            gc.collect()
            print("The best f in epoch%s, dev:%.4f, test:%.4f" %
                  (best_epoch, dev_f[best_epoch], test_f[best_epoch]))
            ## epoch end set
            epoch_start = True
            target_end = False
            epoch_idx += 1

        if source_end:
            epoch_finish_time = time.time()
            ## decode test Source
            speed, acc, p, r, f, _, _ = evaluate("Source", data, model, "test")
            test_finish = time.time()
            test_cost = test_finish - epoch_finish_time
            if data.seg:
                print(
                    "Test (Source): time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (test_cost, speed, acc, p, r, f))
            else:
                print(
                    "Test (Source): time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                    (test_cost, speed, acc))
            source_end = False
예제 #7
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)
    loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -10
    best_dev_uas = -10
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" % (idx, data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0

        sample_loss = {idtask: 0 for idtask in range(data.HP_tasks)}
        right_token = {idtask: 0 for idtask in range(data.HP_tasks)}
        whole_token = {idtask: 0 for idtask in range(data.HP_tasks)}
        random.shuffle(data.train_Ids)
        # print("Shuffle: first input word list:", data.train_Ids[0][0])
        ## set model in train model
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num // batch_size + 1

        for batch_id in range(total_batch):
            start = batch_id * batch_size
            end = (batch_id + 1) * batch_size
            if end > train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                instance, data.HP_gpu, False, False)
            instance_count += 1
            loss, losses, tag_seq = model.calculate_loss(batch_word,
                                                         batch_features,
                                                         batch_wordlen,
                                                         batch_char,
                                                         batch_charlen,
                                                         batch_charrecover,
                                                         batch_label,
                                                         mask,
                                                         inference=False)
            for idtask in range(data.HP_tasks):
                right, whole = predict_check(tag_seq[idtask],
                                             batch_label[idtask], mask)
                sample_loss[idtask] += losses[idtask].item()
                right_token[idtask] += right
                whole_token[idtask] += whole

                if end % 500 == 0:
                    temp_time = time.time()
                    temp_cost = temp_time - temp_start
                    temp_start = temp_time
                    print(
                        "     Instance: %s; Task %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                        % (end, idtask, temp_cost, sample_loss[idtask],
                           right_token[idtask], whole_token[idtask],
                           (right_token[idtask] + 0.) / whole_token[idtask]))
                    if sample_loss[idtask] > 1e8 or str(sample_loss) == "nan":
                        print
                        "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                        exit(0)
                    sys.stdout.flush()
                    sample_loss[idtask] = 0

            if end % 500 == 0:
                print(
                    "--------------------------------------------------------------------------"
                )

            total_loss += loss.item()
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start

        for idtask in range(data.HP_tasks):
            print(
                "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
                (end, temp_cost, sample_loss[idtask], right_token[idtask],
                 whole_token[idtask],
                 (right_token[idtask] + 0.) / whole_token[idtask]))
        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print(
            "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
            % (idx, epoch_cost, train_num / epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print(
                "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
            )
            exit(1)
        summary = evaluate(data, model, "dev", False, False)

        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        current_scores = []
        for idtask in range(0, data.HP_tasks):
            speed, acc, p, r, f, pred_labels, _ = summary[idtask]
            if data.seg:
                current_scores.append(f)
                print(
                    "Task %d Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (idtask, dev_cost, speed, acc, p, r, f))
            else:
                current_scores.append(acc)
                print("Task %d Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                      (idtask, dev_cost, speed, acc))
        pred_results_tasks = []
        pred_scores_tasks = []

        for idtask in range(data.HP_tasks):
            speed, acc, p, r, f, pred_results, pred_scores = summary[idtask]
            pred_results_tasks.append(pred_results)
            pred_scores_tasks.append(pred_scores_tasks)

        with tempfile.NamedTemporaryFile() as f_decode_mt:
            with tempfile.NamedTemporaryFile() as f_decode_st:

                # If we are learning multiple task we move it as a sequence
                # labeling
                if len(data.index_of_main_tasks) > 1:
                    data.decode_dir = f_decode_mt.name
                    data.write_decoded_results(pred_results_tasks, 'dev')

                else:

                    if data.decode_dir is None:
                        data.decode_dir = f_decode_st.name

                    data.write_decoded_results(pred_results_tasks, 'dev')

                output_nn = open(data.decode_dir, encoding='utf-8')
                tmp = tempfile.NamedTemporaryFile().name
                decode_dependencies.decode(output_nn, tmp)
                current_score, current_uas = decode_dependencies.evaluate_dependencies(
                    data.gold_dev_dep, tmp)
                print("Current Score (from LAS)", current_score)
                print("Current Score (from UAS)", current_uas)

        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score (from LAS):", best_dev)
            model_name = data.model_dir + ".model"
            # print ("Overwritting model to", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        else:
            print("sofar the best (from LAS)" + repr(best_dev))
        if current_uas > best_dev_uas:
            if data.seg:
                print("Exceed previous best f score:", best_dev_uas)
            else:
                print("Exceed previous best acc score (from UAS):",
                      best_dev_uas)
            best_dev_uas = current_uas
        else:
            print("sofar the best (from UAS)" + repr(best_dev_uas))
        summary = evaluate(data, model, "test", False)

        test_finish = time.time()
        test_cost = test_finish - dev_finish

        for idtask in range(0, data.HP_tasks):
            speed, acc, p, r, f, _, _ = summary[idtask]
            if data.seg:
                current_score = f
                print(
                    "Task %d Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (idtask, test_cost, speed, acc, p, r, f))
            else:
                current_score = acc
                print("Task %d Test: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                      (idtask, test_cost, speed, acc))
        gc.collect()
예제 #8
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir + ".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)

    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(),
                              lr=data.HP_lr,
                              momentum=data.HP_momentum,
                              weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(),
                                   lr=data.HP_lr,
                                   weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(),
                                  lr=data.HP_lr,
                                  weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(),
                               lr=data.HP_lr,
                               weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s" % (data.optimizer))
        exit(1)
    best_dev = -10
    # data.HP_iteration = 1
    ## start training
    iters_without_change = 0
    previous_f = 0
    with open(data.log_dir, "w") as log_file:
        for idx in range(data.HP_iteration):
            epoch_start = time.time()
            temp_start = epoch_start
            print("Epoch: %s/%s" % (idx, data.HP_iteration))
            if data.optimizer == "SGD":
                optimizer = lr_decay(optimizer, idx, data.HP_lr_decay,
                                     data.HP_lr)
            instance_count = 0
            sample_id = 0
            sample_loss = 0
            total_loss = 0
            right_token = 0
            whole_token = 0
            random.shuffle(data.train_Ids)
            print("Shuffle: first input word list:", data.train_Ids[0][0])
            ## set model in train model
            model.train()
            model.zero_grad()
            batch_size = data.HP_batch_size
            batch_id = 0
            train_num = len(data.train_Ids)
            total_batch = train_num // batch_size + 1
            for batch_id in range(total_batch):
                start = batch_id * batch_size
                end = (batch_id + 1) * batch_size
                if end > train_num:
                    end = train_num
                instance = data.train_Ids[start:end]
                if not instance:
                    continue
                batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask = batchify_with_label(
                    instance, data.HP_gpu, True, data.sentence_classification)
                instance_count += 1
                loss, tag_seq = model.calculate_loss(
                    batch_word, batch_features, batch_wordlen, batch_char,
                    batch_charlen, batch_charrecover, batch_label, mask)
                right, whole = predict_check(tag_seq, batch_label, mask,
                                             data.sentence_classification)
                right_token += right
                whole_token += whole
                # print("loss:",loss.item())
                sample_loss += loss.item()
                total_loss += loss.item()
                if end % 500 == 0:
                    temp_time = time.time()
                    temp_cost = temp_time - temp_start
                    temp_start = temp_time
                    print(
                        "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"
                        %
                        (end, temp_cost, sample_loss, right_token, whole_token,
                         (right_token + 0.) / whole_token))
                    if sample_loss > 1e8 or str(sample_loss) == "nan":
                        print(
                            "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                        )
                        exit(1)
                    sys.stdout.flush()
                    sample_loss = 0
                loss.backward()
                optimizer.step()
                model.zero_grad()
            temp_time = time.time()
            temp_cost = temp_time - temp_start
            print(
                "     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f" %
                (end, temp_cost, sample_loss, right_token, whole_token,
                 (right_token + 0.) / whole_token))

            epoch_finish = time.time()
            epoch_cost = epoch_finish - epoch_start
            print(
                "Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"
                % (idx, epoch_cost, train_num / epoch_cost, total_loss))
            print("totalloss:", total_loss)
            if total_loss > 1e8 or total_loss is np.nan:
                print(
                    "ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT...."
                )
                exit(1)
            # continue
            speed, acc, p, r, f, _, _ = evaluate(data, model, "dev")
            dev_finish = time.time()
            dev_cost = dev_finish - epoch_finish

            if data.seg:
                current_score = f
                print(
                    "Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (dev_cost, speed, acc, p, r, f))
            else:
                current_score = acc
                print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f" %
                      (dev_cost, speed, acc))

            if current_score > best_dev:
                if data.seg:
                    print("Exceed previous best f score:", best_dev)
                else:
                    print("Exceed previous best acc score:", best_dev)
                model_name = data.model_dir + '.' + str(idx) + ".model"
                print("Save current best model in file:", model_name)
                torch.save(model.state_dict(), model_name)
                best_dev = current_score
            # ## decode test
            speed, acc, p, r, f_test, _, _ = evaluate(data, model, "test")
            test_finish = time.time()
            test_cost = test_finish - dev_finish
            if data.seg:
                print(
                    "Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"
                    % (test_cost, speed, acc, p, r, f_test))
            else:
                print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f" %
                      (test_cost, speed, acc))

            log_entry = {"iteration": idx, "train_f": f}
            log_file.write(json.dumps(log_entry) + "\n")
            if abs(f - previous_f) < data.stopping_criterion:
                iters_without_change += 1
            else:
                iters_without_change = 0

            if iters_without_change == data.iters_without_change:
                print(
                    f"Model f-measure has not changed in {iters_without_change} iterations. Stopping."
                )

            gc.collect()
            previous_f = f
예제 #9
0
def train(data):
    print("Training model...")
    data.show_data_summary()
    save_data_name = data.model_dir +".dset"
    data.save(save_data_name)
    if data.sentence_classification:
        model = SentClassifier(data)
    else:
        model = SeqLabel(data)
        print (model)
    # loss_function = nn.NLLLoss()
    if data.optimizer.lower() == "sgd":
        optimizer = optim.SGD(model.parameters(), lr=data.HP_lr, momentum=data.HP_momentum,weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adagrad":
        optimizer = optim.Adagrad(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adadelta":
        optimizer = optim.Adadelta(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "rmsprop":
        optimizer = optim.RMSprop(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2)
    elif data.optimizer.lower() == "adam":
        optimizer = optim.Adam(model.parameters(), lr=data.HP_lr, weight_decay=data.HP_l2)
    else:
        print("Optimizer illegal: %s"%(data.optimizer))
        exit(1)
    best_dev = -10
    
    # data.HP_iteration = 1
    ## start training
    for idx in range(data.HP_iteration):
        epoch_start = time.time()
        temp_start = epoch_start
        print("Epoch: %s/%s" %(idx,data.HP_iteration))
        if data.optimizer == "SGD":
            optimizer = lr_decay(optimizer, idx, data.HP_lr_decay, data.HP_lr)
        instance_count = 0
        sample_id = 0
        sample_loss = 0
        total_loss = 0
        right_token = 0
        whole_token = 0
        
        train_data = list(zip(data.train_Ids, data.train_texts))

        random.shuffle(train_data)
        
        data.train_Ids, data.train_texts = zip(*train_data)
        
        
        model.train()
        model.zero_grad()
        batch_size = data.HP_batch_size
        batch_id = 0
        train_num = len(data.train_Ids)
        total_batch = train_num//batch_size+1
        for batch_id in range(total_batch):
            start = batch_id*batch_size
            end = (batch_id+1)*batch_size
            if end >train_num:
                end = train_num
            instance = data.train_Ids[start:end]
            instance_texts = data.train_texts[start:end]
            
            if not instance:
                continue
            batch_word, batch_features, batch_wordlen, batch_wordrecover, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_word_text  = batchify_with_label(instance, instance_texts , data.HP_gpu, True, data.sentence_classification)
            instance_count += 1
            loss, tag_seq = model.neg_log_likelihood_loss(batch_word, batch_features, batch_wordlen, batch_char, batch_charlen, batch_charrecover, batch_label, mask, batch_word_text)
            right, whole = predict_check(tag_seq, batch_label, mask, data.sentence_classification)
            right_token += right
            whole_token += whole
            # print("loss:",loss.item())
            sample_loss += loss.item()
            total_loss += loss.item()
            if end%500 == 0:
                temp_time = time.time()
                temp_cost = temp_time - temp_start
                temp_start = temp_time
                
                print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))
                if sample_loss > 1e8 or str(sample_loss) == "nan":
                    print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
                 #   exit(1)
                sys.stdout.flush()
                sample_loss = 0
            loss.backward()
            optimizer.step()
            model.zero_grad()
        temp_time = time.time()
        temp_cost = temp_time - temp_start
        print("     Instance: %s; Time: %.2fs; loss: %.4f; acc: %s/%s=%.4f"%(end, temp_cost, sample_loss, right_token, whole_token,(right_token+0.)/whole_token))

        epoch_finish = time.time()
        epoch_cost = epoch_finish - epoch_start
        print("Epoch: %s training finished. Time: %.2fs, speed: %.2fst/s,  total loss: %s"%(idx, epoch_cost, train_num/epoch_cost, total_loss))
        print("totalloss:", total_loss)
        if total_loss > 1e8 or str(total_loss) == "nan":
            print("ERROR: LOSS EXPLOSION (>1e8) ! PLEASE SET PROPER PARAMETERS AND STRUCTURE! EXIT....")
            #exit(1)
        # continue
        speed, acc, p, r, f, pred_results, pred_scores = evaluate(data, model, "dev")
        dev_finish = time.time()
        dev_cost = dev_finish - epoch_finish

        if data.optimize_with_evalb:
            
            with tempfile.NamedTemporaryFile("w",delete=False) as f_decode:
                    
                if data.decode_dir is None:
                    data.decode_dir = f_decode.name
                    decoded_st_dir =  f_decode.name
                    
             
                data.write_decoded_results(pred_results, 'dev')    
                command = ["PYTHONPATH="+data.tree2labels,"python",
                            data.evaluate," --input ",decoded_st_dir," --gold ",data.gold_dev_trees," --evalb ",data.evalb,">",f_decode.name+".out"]
                os.system(" ".join(command))
            
            f_decode = open(f_decode.name+".out","r")
            current_score = float([l for l in f_decode.read().split("\n")
                                           if l.startswith("Bracketing FMeasure")][0].split("=")[1])
            print ("Current Score (from EVALB)", current_score, "Previous best dev (from EVALB)", best_dev)  
        
        elif data.optimize_with_las:
            
            with tempfile.NamedTemporaryFile("w",delete=False) as f_decode:
                if data.decode_dir is None:
                    data.decode_dir = f_decode.name
                    decoded_st_dir =  f_decode.name
                    
                data.write_decoded_results(pred_results, 'dev')    
                #Transforming the output file into a CoNLL file
                command = [#"PYTHONPATH="+abspath(join(dirname(__file__), data.dep2labels)),
                           "python",
                           data.dep2labels+os.sep+"decode_output_file.py", 
                           "--input", 
                           decoded_st_dir,
                           "--output",
                           f_decode.name+".out"
                      ]
                
                p = Popen(" ".join(command),stdout=subprocess.PIPE, shell=True)
                out, err = p.communicate()
    
                command = ["python",
                           data.conll_ud, f_decode.name+".out", 
                           data.gold_dev_trees]#,">",f_decode.name+".out"] 
            
                p = Popen(" ".join(command),stdout=subprocess.PIPE, shell=True)
                out, err = p.communicate()
                out = out.decode("utf-8")

            current_score = float(out.strip().split(":")[1])
            print ("Current Score (from conll_ud)", current_score, "Previous best dev (from conll_ud)", best_dev)  
                             
        else:
            if data.seg:
                current_score = f
                print("Dev: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(dev_cost, speed, acc, p, r, f))
            else:
                current_score = acc
                print("Dev: time: %.2fs speed: %.2fst/s; acc: %.4f"%(dev_cost, speed, acc))




        if current_score > best_dev:
            if data.seg:
                print("Exceed previous best f score:", best_dev)
            else:
                print("Exceed previous best acc score:", best_dev)
            model_name = data.model_dir + ".model"
            #model_name = data.model_dir +'.'+ str(idx) + ".model"
            print("Save current best model in file:", model_name)
            torch.save(model.state_dict(), model_name)
            best_dev = current_score
        # ## decode test
        speed, acc, p, r, f, _,_ = evaluate(data, model, "test")
        test_finish = time.time()
        test_cost = test_finish - dev_finish
        if data.seg:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f, p: %.4f, r: %.4f, f: %.4f"%(test_cost, speed, acc, p, r, f))
        else:
            print("Test: time: %.2fs, speed: %.2fst/s; acc: %.4f"%(test_cost, speed, acc))
        gc.collect()