Example #1
0
def train(computer,
          optimizer,
          real_criterion,
          binary_criterion,
          train,
          valid,
          starting_epoch,
          total_epochs,
          starting_iter,
          iter_per_epoch,
          savestr,
          logfile=True):
    print_interval = 100
    val_interval = 1000
    save_interval = 1000
    target_dim = None
    rldmax_len = 500
    val_batch = 500
    running_loss_deque = deque(maxlen=rldmax_len)

    # erase the logfile

    for epoch in range(starting_epoch, total_epochs):
        # all these are batches
        for i in range(starting_iter, iter_per_epoch):
            train_step_loss = run_one_step(computer, train, optimizer,
                                           binary_criterion)
            if train_step_loss is not None:
                printloss = float(train_step_loss[0])
            else:
                raise ValueError("What is happening?")
                printloss = 10000
            # computer.new_sequence_reset()
            running_loss_deque.appendleft(printloss)
            if i % print_interval == 0:
                running_loss = np.mean(running_loss_deque)
                logprint(
                    logfile,
                    "learning.   count: %4d, training loss: %.10f, running loss: %.10f"
                    % (i, printloss, running_loss))

            if i % val_interval == 0:
                printloss = 0
                for _ in range(val_batch):
                    assert (printloss == printloss)
                    val_loss = valid_one_step(computer, valid,
                                              binary_criterion)
                    if val_loss is not None:
                        printloss += float(val_loss[0])
                    else:
                        global failure
                        failure += 1
                printloss = printloss / val_batch
                logprint(
                    logfile, "validation. count: %4d, val loss     : %.10f" %
                    (i, printloss))

            if i % save_interval == 0:
                save_model(computer, optimizer, epoch, i, savestr)
                print("model saved for epoch", epoch, "input", i)
Example #2
0
def valid(computer,
          optimizer,
          real_criterion,
          binary_criterion,
          train,
          valid,
          starting_epoch,
          total_epochs,
          starting_iter,
          iter_per_epoch,
          savestr,
          logfile=False):
    """
    I have problem comparing the performances of different models. They do not seem to refer to the same value.
    Processing by sequences and processing by steps are fundamentally different and unfair.

    :param computer:
    :param optimizer:
    :param real_criterion:
    :param binary_criterion:
    :param train: this is the ChannelManager class. It has a __next__ method defined.
    :param valid: ditto
    :param starting_epoch:
    :param total_epochs:
    :param starting_iter:
    :param iter_per_epoch:
    :param savestr: a custom string that identifies this training run
    :param logfile:
    :return:
    """
    global global_exception_counter
    print_interval = 100
    val_interval = 10000
    save_interval = 10000
    target_dim = None
    rldmax_len = 500
    val_batch = 100000
    running_loss_deque = deque(maxlen=rldmax_len)
    computer.eval()

    val_losses = []
    for i in range(val_batch):
        val_loss = valid_one_step(computer, valid, binary_criterion)
        if val_loss is not None:
            printloss = float(val_loss[0])
            val_losses.append(printloss)
        else:
            raise ValueError("Why is val_loss None again?")
        if logfile:
            logprint(
                logfile, "validation. count: %4d, val loss     : %.10f" %
                (i, printloss))
        print("validation. count: %4d, loss: %.10f" % (i, printloss))
    print("loss:", np.mean(val_losses))
Example #3
0
def train(computer,
          optimizer,
          real_criterion,
          binary_criterion,
          train,
          valid_dl,
          starting_epoch,
          total_epochs,
          starting_iter,
          iter_per_epoch,
          target_dim,
          savestr,
          beta,
          logfile=False,
          kill_time=True):

    valid_iterator = iter(valid_dl)
    print_interval = 100
    val_interval = 999
    save_interval = 5
    rldmax_len = 50
    val_batch = int(val_bat_cons / param_bs)
    running_cod_loss = deque(maxlen=rldmax_len)
    running_toe_loss = deque(maxlen=rldmax_len)
    traincms, validcms = ConfusionMatrixStats(
        target_dim - 1), ConfusionMatrixStats(target_dim - 1)
    cms = (traincms, validcms)

    if logfile:
        open(logfile, 'w').close()

    for name, param in computer.named_parameters():
        logprint(logfile, name)
        logprint(logfile, param.data.shape)

    for epoch in range(starting_epoch, total_epochs):
        for i, (input, target, loss_type) in enumerate(train):
            i = starting_iter + i
            if kill_time:
                out_of_time()

            if i % val_interval == 0:
                for _ in range(val_batch):
                    # we should consider running validation multiple times and average. TODO
                    try:
                        (input, target, loss_type) = next(valid_iterator)
                    except StopIteration:
                        valid_iterator = iter(valid_dl)
                        (input, target, loss_type) = next(valid_iterator)
                    cod_loss, toe_loss = run_one_patient(computer,
                                                         input,
                                                         target,
                                                         optimizer,
                                                         loss_type,
                                                         real_criterion,
                                                         binary_criterion,
                                                         beta,
                                                         cms,
                                                         validate=True)
                # TODO this validation is not printing correctly. Way too big.
                # this line is not printing the right value. it's not averaged.
                logprint(
                    logfile,
                    "validation. cod: %.10f, toe: %.10f, total: %.10f" %
                    (cod_loss, toe_loss, cod_loss + beta * toe_loss))
                logprint(
                    logfile, "validate sen: %.6f, spe: %.6f, roc: %.6f" %
                    tuple(validcms.running_stats()))

            if i < iter_per_epoch:
                cod_loss, toe_loss = run_one_patient(computer, input, target,
                                                     optimizer, loss_type,
                                                     real_criterion,
                                                     binary_criterion, beta,
                                                     cms)
                total_loss = cod_loss + toe_loss
                running_cod_loss.appendleft(cod_loss)
                running_toe_loss.appendleft(toe_loss)
                if i % print_interval == 0:
                    running_cod = np.mean(running_cod_loss)
                    running_toe = np.mean(running_toe_loss)
                    logprint(
                        logfile,
                        "batch %4d. batch cod: %.5f, toe: %.5f, total: %.5f. running cod: %.5f, toe: %.5f, total: %.5f"
                        % (i, cod_loss, toe_loss, cod_loss + beta * toe_loss,
                           running_cod, running_toe,
                           running_cod + beta * running_toe))
                    logprint(
                        logfile, "train sen: %.6f, spe: %.6f, roc: %.6f" %
                        tuple(traincms.running_stats()))
            else:
                break

        if epoch % save_interval == 0:
            save_model(computer, optimizer, epoch, i, savestr)
            print("model saved for epoch", epoch, "input", i)
        starting_iter = 0
Example #4
0
def train(computer,
          optimizer,
          real_criterion,
          binary_criterion,
          train,
          valid_dl,
          starting_epoch,
          total_epochs,
          starting_iter,
          iter_per_epoch,
          savestr,
          logfile=False):
    valid_iterator = iter(valid_dl)
    print_interval = 10
    val_interval = 200
    save_interval = 800
    target_dim = None
    rldmax_len = 50
    val_batch = 100
    running_loss_deque = deque(maxlen=rldmax_len)
    if logfile:
        open(logfile, 'w').close()

    for epoch in range(starting_epoch, total_epochs):
        for i, (input, target, loss_type) in enumerate(train):
            i = starting_iter + i
            if target_dim is None:
                target_dim = target.shape[2]

            if i < iter_per_epoch:
                train_story_loss = run_one_patient(computer, input, target,
                                                   target_dim, optimizer,
                                                   loss_type, real_criterion,
                                                   binary_criterion)
                if train_story_loss is not None:
                    printloss = float(train_story_loss[0])
                else:
                    raise ValueError("Why would story loss be None?")
                running_loss_deque.appendleft(printloss)
                if i % print_interval == 0:
                    running_loss = np.mean(running_loss_deque)
                    logprint(
                        logfile,
                        "learning.   count: %4d, training loss: %.10f, running loss: %.10f"
                        % (i, printloss, running_loss))

                if i % val_interval == 0:
                    printloss = 0
                    for _ in range(val_batch):
                        # we should consider running validation multiple times and average. TODO
                        try:
                            (input, target, loss_type) = next(valid_iterator)
                        except StopIteration:
                            valid_iterator = iter(valid_dl)
                            (input, target, loss_type) = next(valid_iterator)

                        val_loss = run_one_patient(computer,
                                                   input,
                                                   target,
                                                   target_dim,
                                                   optimizer,
                                                   loss_type,
                                                   real_criterion,
                                                   binary_criterion,
                                                   validate=True)
                        if val_loss is not None:
                            printloss += float(val_loss[0])
                        else:
                            raise ValueError("Investigate this")
                    printloss = printloss / val_batch
                    logprint(
                        logfile,
                        "validation. count: %4d, val loss     : %.10f" %
                        (i, printloss))

                if i % save_interval == 0:
                    save_model(computer, optimizer, epoch, i, savestr)
                    print("model saved for epoch", epoch, "input", i)
            else:
                break
def train(computer,
          optimizer,
          real_criterion,
          binary_criterion,
          train,
          valid_dl,
          starting_epoch,
          total_epochs,
          starting_iter,
          iter_per_epoch,
          savestr,
          beta,
          logfile=False,
          kill_time=True):
    # valid_iterator = iter(valid_dl)
    print_interval = 100
    val_interval = 500
    save_interval = int(32768 / param_bs)
    target_dim = None
    rldmax_len = 50
    val_batch = int(val_bat_cons / param_bs)
    running_cod_loss = deque(maxlen=rldmax_len)
    running_toe_loss = deque(maxlen=rldmax_len)
    if logfile:
        open(logfile, 'w').close()

    for name, param in computer.named_parameters():
        logprint(logfile, name)
        logprint(logfile, param.data.shape)

    for epoch in range(starting_epoch, total_epochs):
        for i, (input, target, loss_type) in enumerate(train):
            i = starting_iter + i
            if kill_time:
                out_of_time()

            if target_dim is None:
                target_dim = target.shape[1]

            if i < iter_per_epoch:
                cod_loss, toe_loss = run_one_patient(computer, input, target,
                                                     optimizer, loss_type,
                                                     real_criterion,
                                                     binary_criterion, beta)
                total_loss = cod_loss + toe_loss
                running_cod_loss.appendleft(cod_loss)
                running_toe_loss.appendleft(toe_loss)
                if i % print_interval == 0:
                    running_cod = np.mean(running_cod_loss)
                    running_toe = np.mean(running_toe_loss)
                    logprint(
                        logfile,
                        "batch %4d. batch cod: %.5f, toe: %.5f, total: %.5f. running cod: %.5f, toe: %.5f, total: %.5f"
                        % (i, cod_loss, toe_loss, cod_loss + beta * toe_loss,
                           running_cod, running_toe,
                           running_cod + beta * running_toe))
                #
                # if i % val_interval == 0:
                #     total_cod=0
                #     total_toe=0
                #     total_sen=0
                #     total_spe=0
                #     total_prec=0
                #     total_reca=0
                #     total_f1=0
                #     total_accu=0
                #     total_roc=0
                #     for _ in range(val_batch):
                #         # we should consider running validation multiple times and average. TODO
                #         try:
                #             (input, target, loss_type) = next(valid_iterator)
                #         except StopIteration:
                #             valid_iterator = iter(valid_dl)
                #             (input, target, loss_type) = next(valid_iterator)
                #
                #         cod_loss, toe_loss, sen, spe, prec, reca, f1, accu, roc \
                #             = run_one_patient(computer, input, target, optimizer, loss_type,
                #                               real_criterion, binary_criterion, beta, validate=True)
                #         total_cod+=cod_loss
                #         total_toe+=toe_loss
                #         total_sen+=sen
                #         total_spe+=spe
                #         total_prec+=prec
                #         total_reca+=reca
                #         total_f1+=f1
                #         total_accu+=accu
                #         total_roc+=roc
                #     total_cod=total_cod/val_batch
                #     total_toe=total_toe/val_batch
                #     total_sen=total_sen/val_batch
                #     total_spe=total_spe/val_batch
                #     total_prec=total_prec/val_batch
                #     total_reca=total_reca/val_batch
                #     total_f1=total_f1/val_batch
                #     total_accu=total_accu/val_batch
                #     total_roc=total_roc/val_batch
                #     assert(total_cod>0)
                #     assert(total_toe>0)
                #     assert(total_sen>0)
                #     assert(total_spe>0)
                #     assert(total_prec>0)
                #     assert(total_reca>0)
                #     assert(total_f1>0)
                #     assert(total_accu>0)
                #     assert(total_roc>0)
                #     # TODO this validation is not printing correctly. Way too big.
                #     logprint(logfile, "validation. cod: %.10f, toe: %.10f, total: %.10f" %
                #              (total_cod, total_toe, total_cod + beta*total_toe))
                #     logprint(logfile, "sen: %.6f, spe: %.6f, prec: %.6f, recall: %.6f, f1: %.6f, accu: %.6f, roc: %.6f" %
                #              (total_sen, total_spe, total_prec, total_reca, total_f1, total_accu, total_roc))
                #

                if i % save_interval == 0:
                    save_model(computer, optimizer, epoch, i, savestr)
                    print("model saved for epoch", epoch, "input", i)
            else:
                break
        starting_iter = 0
Example #6
0
def train(computer,
          optimizer,
          real_criterion,
          binary_criterion,
          train,
          valid_dl,
          starting_epoch,
          total_epochs,
          starting_iter,
          iter_per_epoch,
          savestr,
          beta,
          logfile=False):
    valid_iterator = iter(valid_dl)
    print_interval = 10
    val_interval = 400
    save_interval = int(8000 / param_bs)
    target_dim = None
    rldmax_len = 50
    val_batch = int(val_bat_cons / param_bs)
    running_cod_loss = deque(maxlen=rldmax_len)
    running_toe_loss = deque(maxlen=rldmax_len)
    if logfile:
        open(logfile, 'w').close()

    for name, param in computer.named_parameters():
        logprint(logfile, name)
        logprint(logfile, param.data.shape)

    for epoch in range(starting_epoch, total_epochs):
        for i, (input, target, loss_type) in enumerate(train):
            i = starting_iter + i
            out_of_time()

            if target_dim is None:
                target_dim = target.shape[1]

            if i < iter_per_epoch:
                cod_loss, toe_loss = run_one_patient(computer, input, target,
                                                     optimizer, loss_type,
                                                     real_criterion,
                                                     binary_criterion, beta)
                total_loss = cod_loss + toe_loss
                running_cod_loss.appendleft(cod_loss)
                running_toe_loss.appendleft(toe_loss)
                if i % print_interval == 0:
                    running_cod = np.mean(running_cod_loss)
                    running_toe = np.mean(running_toe_loss)
                    logprint(
                        logfile,
                        "batch %4d. batch cod: %.5f, toe: %.5f, total: %.5f. running cod: %.5f, toe: %.5f, total: %.5f"
                        % (i, cod_loss, toe_loss, cod_loss + beta * toe_loss,
                           running_cod, running_toe,
                           running_cod + beta * running_toe))

                if i % val_interval == 0:
                    total_cod = 0
                    total_toe = 0
                    for _ in range(val_batch):
                        # we should consider running validation multiple times and average. TODO
                        try:
                            (input, target, loss_type) = next(valid_iterator)
                        except StopIteration:
                            valid_iterator = iter(valid_dl)
                            (input, target, loss_type) = next(valid_iterator)

                        cod_loss, toe_loss = run_one_patient(computer,
                                                             input,
                                                             target,
                                                             optimizer,
                                                             loss_type,
                                                             real_criterion,
                                                             binary_criterion,
                                                             beta,
                                                             validate=True)
                        total_cod += cod_loss
                        total_toe += toe_loss
                    total_cod = total_cod / val_batch
                    total_toe = total_toe / val_batch
                    # TODO this validation is not printing correctly. Way too big.
                    logprint(
                        logfile,
                        "validation. cod: %.10f, toe: %.10f, total: %.10f" %
                        (total_cod, total_toe, total_cod + beta * total_toe))

                if i % save_interval == 0:
                    save_model(computer, optimizer, epoch, i, savestr)
                    print("model saved for epoch", epoch, "input", i)
            else:
                break