예제 #1
0
def run(model, exp, terms, save_freq=5, data=None):

    exp_dir = ds.get_path_to_dataset(pb.Experiment.Dataset.Name(exp.dataset))
    save_dir = os.path.join(exp_dir, exp.description)

    ##
    # Load data from .npz archive created by invoking
    # deep_learning/utils/archive.py
    ##

    if data:
        x_train, y_train, x_test, y_test = data
        x_train, x_test = tr.transform(x_train, x_test)
    else:
        h_file, (x_train, y_train, x_test, y_test) = ds.load_dataset(
            pb.Experiment.Dataset.Name(exp.dataset),
            exp.coordinates + '/transformed')
        data = x_train, y_train, x_test, y_test

    exp_file_name = exp.description + '.exp'

    # Start training

    train_length = x_train.shape[0]
    num_batches = int(ceil(train_length / exp.batch_size))

    valid = Validator(exp, terms)

    eTimes = np.array([])
    valid._clock = clock()
    model.summary()
    while valid.check():
        t = clock()
        if valid._num_epochs:
            print("Epoch {}/{}".format(valid.epochs + 1, valid._num_epochs))
        else:
            print("Epoch {}".format(valid.epochs + 1))
        bETA = 0
        bTimes = np.array([])
        #print("\t Training: ")
        for b in xrange(num_batches):
            bt = clock()
            # Update progress bar
            progress(b, num_batches, exp.batch_size, bETA)
            # Train on a batch
            x_batch = x_train[b * exp.batch_size:b * exp.batch_size +
                              exp.batch_size, :]
            y_batch = y_train[b * exp.batch_size:b * exp.batch_size +
                              exp.batch_size, :]
            model.train_on_batch(x_batch, y_batch)
            bTimes = np.append(bTimes, clock() - bt)
            bETA = np.median(bTimes) * (num_batches - b - 1)
        # Finish progress bar
        progress(num_batches,
                 num_batches,
                 exp.batch_size,
                 0,
                 end='\n',
                 time=clock() - t)
        # Calculate stats and add the epoch results to the experiment object
        epoch = exp.results.add()
        timer = clock()
        print("Evaluating Train")
        epoch.train_loss, epoch.train_accuracy = model.evaluate_generator(
            ((x_train[i * exp.batch_size:(i + 1) * exp.batch_size],
              y_train[i * exp.batch_size:(i + 1) * exp.batch_size])
             for i in xrange(num_batches)),
            num_batches,
            max_q_size=min((num_batches // 2, 10)))
        #print("Finished {:.2f}s".format(clock()-timer))
        timer = clock()
        print("Evaluating Test")
        epoch.test_loss, epoch.test_accuracy = model.evaluate_generator(
            ((x_test[i * exp.batch_size:(i + 1) * exp.batch_size],
              y_test[i * exp.batch_size:(i + 1) * exp.batch_size])
             for i in xrange(int(ceil(x_test.shape[0] / exp.batch_size)))),
            int(ceil(x_test.shape[0] / exp.batch_size)),
            max_q_size=min(
                (int(ceil(x_test.shape[0] / exp.batch_size)) // 2, 10)))
        #print("Finished {:.2f}s".format(clock() - timer))
        timer = clock()
        print("Calculating Sig")
        epoch.s_b = st.significance(model, data)
        #print("Finished {:.2f}".format(clock() - timer))
        #timer = clock()
        #print("Calculating AUC {:.2f}".format(clock()))
        #epoch.auc = st.AUC(model, data, experiment_epoch=epoch)
        #print("Finished {:.2f}".format(clock() - timer))
        timer = clock()
        for r in st.num_of_each_cell(model, data):
            epoch.matrix.add().columns.extend(r)
        print("Making CFM")
        matrix = st.confusion_matrix(model, data, offset='\t ')
        #print("Finished {:.2f}".format(clock() - timer))
        epoch.num_seconds = clock() - t
        timer = clock()
        print("Getting output")
        output = st.get_output_distro(model, data)
        epoch.output.background.extend(output["background"])
        epoch.output.signal.extend(output["signal"])
        #print("Finished {:.2f}".format(clock() - timer))
        # Print statistics
        print("\t Train Accuracy: {:.3f}\tTest Accuracy: {:.3f}".format(
            epoch.train_accuracy, epoch.test_accuracy))
        if valid.update_w():
            print("\t Slope: {:.5f} (test_accuracy / second)".format(
                valid.slope))
        print("\t Time this epoch: {:.2f}s".format(epoch.num_seconds), end='')
        if valid._num_epochs:
            eTimes = np.append(eTimes, epoch.num_seconds)
            print("\tFinal ETA: {}".format(
                convert_seconds(
                    np.median(eTimes) * (valid._num_epochs - valid.epochs))))
        else:
            print()
        print("\t Significance (S/sqrt(B)): {:.2f}".format(epoch.s_b))
        print("\t Area Under the Curve (efficiency): {:.3f}".format(epoch.auc))
        print(matrix)

        # Saves the model
        if (len(exp.results) % save_freq) == 0:
            save(model, exp, save_dir, exp_file_name)
            print("\t ", end='')
        sys.stdout.flush()

    exp.end_date_time = str(datetime.datetime.now())
    exp.total_time = valid.time

    print("\n" + valid.failed)
    print("Total Time: {}".format(convert_seconds(valid.time)))

    save(model, exp, save_dir, exp_file_name)
    print("\t ", end='')
    h_file.close()
예제 #2
0
        cutoff = 1-i*(1/datapoints)
        e_b[i], e_s[i] = efficiencies(model, data, cutoff)[:,1]
        if experiment_epoch:
            point = experiment_epoch.curve.add()
            point.signal = e_s[i]
            point.background = e_b[i]
            point.cutoff = cutoff
    if save:
        plt.plot(e_b, e_s)
        plt.title("Efficiency Curve")
        plt.ylabel("Signal Efficiency")
        plt.xlabel("Background Inefficiency")
        plt.savefig(save, format="png")
    return trapz(e_s,e_b)

# ""
def confusion_matrix(model, data, offset='', **kwargs):
    eff = efficiencies(model, data, **kwargs)
    return MATRIX.format(offset, *(eff*100).flatten())


if __name__ == "__main__":
    from deep_learning.trainNN import load_model
    model = load_model("ttHLep/U_Optimal")
    x_train, y_train, x_test, y_test = ds.load_dataset("ttHLep", "Unsorted")
    x_train, x_test = tr.transform(x_train, x_test)
    data = (x_train, y_train, x_test, y_test)
    print significance(model, data)
    print AUC(model, data)
    print confusion_matrix(model, data)
    print confusion_matrix(model, data, over_rows=False)
예제 #3
0
def run(model, exp, terms, save_freq=5, data=None):

    exp_dir = ds.get_path_to_dataset(pb.Experiment.Dataset.Name(exp.dataset))
    save_dir = os.path.join(exp_dir, exp.description)

    ##
    # Load data from .npz archive created by invoking
    # deep_learning/utils/archive.py
    ##

    if data:
        x_train, y_train, x_test, y_test = data
        x_train, x_test = tr.transform(x_train, x_test)
    else:
        h_file, (x_train, y_train, x_test, y_test) = ds.load_dataset(pb.Experiment.Dataset.Name(exp.dataset), exp.coordinates)
        x_train, x_test = tr.transform(x_train, x_test)
        data = x_train, y_train, x_test, y_test

    exp_file_name = exp.description + '.exp'

    train_length = x_train.shape[0]
    num_batches = int(ceil(train_length / exp.batch_size))

    valid = Validator(exp, terms)

    eTimes = np.array([])
    valid._clock = clock()
    model.summary()
    while valid.check():
        t = clock()
        if valid._num_epochs:
            print("Epoch {}/{}".format(valid.epochs+1, valid._num_epochs))
        else:
            print("Epoch {}".format(valid.epochs+1))
        bETA = 0
        bTimes = np.array([])
        for b in xrange(num_batches):
            bt = clock()
            # Update progress bar
            progress(b, num_batches, exp.batch_size, bETA)
            # Train on a batch
            model.train_on_batch(x_train[b*exp.batch_size:b*exp.batch_size+exp.batch_size, :],
                                 y_train[b*exp.batch_size:b*exp.batch_size+exp.batch_size, :])
            bTimes = np.append(bTimes, clock()-bt)
            bETA = np.median(bTimes)*(num_batches-b-1)
        # Finish progress bar
        progress(num_batches, num_batches, exp.batch_size, 0, end='\n')
        # Calculate stats and add the epoch results to the experiment object
        epoch = exp.results.add()
        epoch.train_loss, epoch.train_accuracy = model.evaluate_generator(((x_train[i*exp.batch_size:(i+1)*exp.batch_size],
                                                                           y_train[i*exp.batch_size:(i+1)*exp.batch_size]) for i in xrange(int(ceil(x_test.shape[0]/exp.batch_size)))),
                                                                          int(ceil(x_test.shape[0]/exp.batch_size)))
        epoch.test_loss, epoch.test_accuracy = model.evaluate_generator(((x_test[i*exp.batch_size:(i+1)*exp.batch_size],
                                                                           y_test[i*exp.batch_size:(i+1)*exp.batch_size]) for i in xrange(num_batches)),
                                                              num_batches)
        epoch.s_b = st.significance(model, data)
        epoch.auc = st.AUC(model, data, experiment_epoch=epoch)
        for r in st.num_of_each_cell(model, data):
            epoch.matrix.add().columns.extend(r)
        matrix = st.confusion_matrix(model, data, offset='\t ')
        epoch.num_seconds = clock() - t
        # Print statistics
        print("\t Train Accuracy: {:.3f}\tTest Accuracy: {:.3f}".format(epoch.train_accuracy, epoch.test_accuracy))
        if valid.update_w():
            print("\t Slope: {:.5f} (test_accuracy / second)".format(valid.slope))
        print("\t Time this epoch: {:.2f}s".format(epoch.num_seconds), end='')
        if valid._num_epochs:
            eTimes = np.append(eTimes, epoch.num_seconds)
            print("\tFinal ETA: {}".format(convert_seconds(np.median(eTimes) * (valid._num_epochs - valid.epochs))))
        else:
            print()
        print("\t Significance (S/sqrt(B)): {:.2f}".format(epoch.s_b))
        print("\t Area Under the Curve (efficiency): {:.3f}".format(epoch.auc))
        print(matrix)

        if (len(exp.results) % save_freq) == 0:
            save(model, exp, save_dir, exp_file_name)
            print("\t Saved the model\n")
        sys.stdout.flush()

    exp.end_date_time = str(datetime.datetime.now())
    exp.total_time = valid.time

    print("\n"+valid.failed)
    print("Total Time: {}".format(convert_seconds(valid.time)))

    save(model, exp, save_dir, exp_file_name, graph=True)
    h_file.close()