Exemple #1
0
def main():
    parser = argparse.ArgumentParser()

    parser.add_argument("--logdir",
                        dest="log_dir",
                        type=str,
                        default="./logs/untitled-{}".format(
                            datetime.now().strftime("%y%m%d-%H%M%S")))

    parser.add_argument("--num_gpus",
                        default=len(get_available_gpus()),
                        type=int)
    parser.add_argument("--multi-gpu",
                        default=False,
                        action='store_true',
                        dest='multi_gpu')

    # Hyperparameters
    parser.add_argument("--epoch", dest="num_epochs", default=100, type=int)
    parser.add_argument("--batch_size", default=128, type=int)
    parser.add_argument("--valid_batch_size", default=1024, type=int)

    parser.add_argument("--lr", default=0.001, type=float)
    parser.add_argument(
        "--clipnorm",
        default=-1,
        type=float,
        help="if it is greater than 0, then graidient clipping is activated")
    parser.add_argument("--clipvalue", default=-1, type=float)
    parser.add_argument("--use-class-weight",
                        dest="use_class_weight",
                        default=False,
                        action="store_true")

    # Frequencies
    parser.add_argument("--valid_freq", type=int, default=32)
    parser.add_argument("--save_freq", type=int, default=32)
    parser.add_argument("-v", "--verbose", action="store_true")

    # Project parameters
    parser.add_argument("--min-pt", dest="min_pt", default=100, type=int)

    args = parser.parse_args()

    ###################
    #
    ###################
    log_dir = Directory(path=args.log_dir)
    log_dir.mkdir("script")
    log_dir.mkdir("model_checkpoint")
    log_dir.mkdir("learning_curve")
    log_dir.mkdir("roc_curve")
    log_dir.mkdir("model_response")

    backup_scripts(log_dir.script.path)

    config = Config(log_dir.path, "w")
    config.append(args)
    config["hostname"] = os.environ["HOSTNAME"]

    ########################################
    # Load training and validation datasets
    ########################################
    dset = get_dataset_paths(args.min_pt)
    config.append(dset)

    config["seq_maxlen"] = {"x": 30}

    train_iter = get_data_iter(path=dset["training"],
                               batch_size=args.batch_size,
                               seq_maxlen=config.seq_maxlen,
                               fit_generator_mode=True)

    valid_iter = get_data_iter(path=dset["validation"],
                               batch_size=args.valid_batch_size,
                               seq_maxlen=config.seq_maxlen,
                               fit_generator_mode=True)

    test_iter = get_data_iter(path=dset["test"],
                              batch_size=args.valid_batch_size,
                              seq_maxlen=config.seq_maxlen,
                              fit_generator_mode=False)

    if args.use_class_weight:
        class_weight = get_class_weight(train_iter)
        config["class_weight"] = list(class_weight)
    else:
        class_weight = None

    #################################
    # Build & Compile a model.
    #################################
    x_shape = train_iter.get_shape("x", batch_shape=False)

    model = build_a_model(x_shape=x_shape)

    config["model"] = model.get_config()

    if args.multi_gpu:
        model = multi_gpu_model(_model, gpus=args.num_gpus)

    model_plot_path = log_dir.concat("model.png")
    plot_model(model, to_file=model_plot_path, show_shapes=True)

    loss = 'categorical_crossentropy'

    # TODO capsulisation
    optimizer_kwargs = {}
    if args.clipnorm > 0:
        optimzer_kwargs["clipnorm"] = args.clipnorm
    if args.clipvalue > 0:
        optimzer_kwargs["clipvalue"] = args.clipvalue
    optimizer = optimizers.Adam(lr=args.lr, **optimizer_kwargs)

    metric_list = ["accuracy", roc_auc]

    model.compile(loss=loss, optimizer=optimizer, metrics=metric_list)

    config["loss"] = loss
    config["optimizer"] = "Adam"
    config["optimizer_config"] = optimizer.get_config()

    ###########################################################################
    # Callbacks
    ###########################################################################
    ckpt_format_str = "weights_epoch-{epoch:02d}_loss-{val_loss:.4f}_acc-{val_acc:.4f}_auc-{val_roc_auc:.4f}.hdf5"
    ckpt_path = log_dir.model_checkpoint.concat(ckpt_format_str)
    csv_log_path = log_dir.concat("log_file.csv")

    learning_curve = LearningCurve(directory=log_dir.learning_curve.path)
    learning_curve.book(x="step", y="roc_auc", best="max")
    learning_curve.book(x="step", y="acc", best="max")
    learning_curve.book(x="step", y="loss", best="min")

    callback_list = [
        callbacks.ModelCheckpoint(filepath=ckpt_path),
        callbacks.EarlyStopping(monitor="val_loss", patience=5),
        callbacks.ReduceLROnPlateau(),
        callbacks.CSVLogger(csv_log_path),
        learning_curve,
    ]

    ############################################################################
    # Training
    ############################################################################
    model.fit_generator(train_iter,
                        steps_per_epoch=len(train_iter),
                        epochs=50,
                        validation_data=valid_iter,
                        validation_steps=len(valid_iter),
                        callbacks=callback_list,
                        shuffle=True,
                        class_weight=class_weight)

    print("Training is over! :D")

    del model

    ###########################################
    # Evaluation
    ############################################
    train_iter.fit_generator_mode = False
    train_iter.cycle = False

    good_ckpt = find_good_checkpoint(log_dir.model_checkpoint.path,
                                     which={
                                         "max": ["auc", "acc"],
                                         "min": ["loss"]
                                     })

    for idx, each in enumerate(good_ckpt, 1):
        print("[{}/{}] {}".format(idx, len(good_ckpt), each))

        K.clear_session()
        evaluate(custom_objects={"roc_auc": roc_auc},
                 checkpoint_path=each,
                 train_iter=train_iter,
                 test_iter=test_iter,
                 log_dir=log_dir)

    config.save()
Exemple #2
0
def train():
    parser = argparse.ArgumentParser()

    parser.add_argument("--datasets_dir",
                        default="/store/slowmoyang/QGJets/data/root_100_200/2-Refined/",
                        type=str)

    parser.add_argument("--log_dir", default="./logs/{name}", type=str)
    parser.add_argument("--num_gpus", default=len(get_available_gpus()), type=int)
    parser.add_argument("--multi-gpu", default=False, action='store_true', dest='multi_gpu')

    # Hyperparameters
    parser.add_argument("--num_epochs", default=50, type=int)
    parser.add_argument("--batch_size", default=128, type=int)
    parser.add_argument("--valid_batch_size", default=1024, type=int)
    parser.add_argument("--lr", default=0.001, type=float)

    # Frequencies
    parser.add_argument("--valid_freq", type=int, default=32)
    parser.add_argument("--save_freq", type=int, default=32)
    parser.add_argument("-v", "--verbose", action="store_true")

    # Project parameters

    args = parser.parse_args()

    #########################################################
    # Log directory
    #######################################################
    if '{name}' in args.log_dir:
        args.log_dir = args.log_dir.format(
            name="Untitled_{}".format(
                datetime.today().strftime("%Y-%m-%d_%H-%M-%S")))
    log_dir = get_log_dir(path=args.log_dir, creation=True)
 
    # Config
    config = Config(dpath=log_dir.path, mode="WRITE")
    config.update(args)

    dataset_paths = get_dataset_paths(config.datasets_dir)
    for key, value in dataset_paths.iteritems():
        print("{}: {}".format(key, value))
    config.update(dataset_paths)

    ########################################
    # Load training and validation datasets
    ########################################
    config["seq_maxlen"] = {"x_kinematics": 40, "x_pid": 40}

    train_iter = get_data_iter(
        path=config.dijet_training_set,
        seq_maxlen=config.seq_maxlen,
        batch_size=config.batch_size)

    valid_dijet_iter = get_data_iter(
        path=config.dijet_validation_set,
        seq_maxlen=config.seq_maxlen,
        batch_size=config.valid_batch_size,
        cyclic=True)

    valid_zjet_iter = get_data_iter(
        path=config.zjet_validation_set,
        seq_maxlen=config.seq_maxlen,
        batch_size=config.valid_batch_size,
        cyclic=True)

    steps_per_epoch = len(train_iter)
    total_step = config.num_epochs * steps_per_epoch
    if config.verbose:
        print("# of steps per one epoch: {}".format(steps_per_epoch))
        print("Total step: {}".format(total_step))


    #################################
    # Build & Compile a model.
    #################################
    x_kinematics_shape = train_iter.get_shape("x_kinematics", batch_shape=False)
    x_pid_shape = train_iter.get_shape("x_pid", batch_shape=False)

    _model = build_a_model(x_kinematics_shape, x_pid_shape)

    if config.multi_gpu:
        model = multi_gpu_model(_model, gpus=config.num_gpus)
    else:
        model = _model

    # TODO config should have these information.
    loss = 'binary_crossentropy'
    optimizer = optimizers.Adam(lr=config.lr)
    metric_list = ['accuracy']

    model.compile(
        loss=loss,
        optimizer=optimizer,
        metrics=metric_list)

    if config.verbose:
        model.summary()

    #######################################
    # 
    ###########################################

    meter = Meter(
        name_list=["step", "lr",
                   "train_loss", "dijet_loss", "zjet_loss",
                   "train_acc", "dijet_acc", "zjet_acc"],
        dpath=log_dir.validation.path)

    #######################################
    # Training with validation
    #######################################
    start_message = "TRAINING START"
    print("$" * (len(start_message) + 4))
    print("$ {} $".format(start_message))
    print("$" * (len(start_message) + 4))

    step = 0
    for epoch in range(config.num_epochs):
        print("Epoch [{epoch}/{num_epochs}]".format(epoch=(epoch+1), num_epochs=config.num_epochs))

        for train_batch in train_iter:
            #########################################
            # Validation
            ################################################
            if step % config.valid_freq == 0 or step % config.save_freq == 0:
                valid_dj_batch = valid_dijet_iter.next()
                valid_zj_batch = valid_zjet_iter.next()

                train_loss, train_acc = model.test_on_batch(
                    x=[train_batch.x_kinematics, train_batch.x_pid],
                    y=train_batch.y)

                dijet_loss, dijet_acc = model.test_on_batch(
                    x=[valid_dj_batch.x_kinematics, valid_dj_batch.x_pid],
                    y=valid_dj_batch.y)

                zjet_loss, zjet_acc = model.test_on_batch(
                    x=[valid_zj_batch.x_kinematics, valid_zj_batch.x_pid],
                    y=valid_zj_batch.y)

                print("Step [{step}/{total_step}]".format(step=step, total_step=total_step))
                print("  Training:\n\tLoss {:.3f} | Acc. {:.3f}".format(train_loss, train_acc))
                print("  Validation on Dijet\n\tLoss {:.3f} | Acc. {:.3f}".format(dijet_loss, dijet_acc))
                print("  Validation on Z+jet\n\tLoss {:.3f} | Acc. {:.3f}".format(zjet_loss,zjet_acc))
                # print("  LR: {:.5f}".format(K.get_value(model.optimizer.lr)))

                meter.append({
                    "step": step, "lr": K.get_value(model.optimizer.lr),
                    "train_loss": train_loss, "dijet_loss": dijet_loss, "zjet_loss": zjet_loss,
                    "train_acc": train_acc, "dijet_acc": dijet_acc, "zjet_acc": zjet_acc})

            # Save model
            if (step != 0) and (step % config.save_freq == 0):
                filepath = os.path.join(
                    log_dir.saved_models.path,
                    "model_step-{step:06d}_loss-{loss:.3f}_acc-{acc:.3f}.h5".format(
                        step=step, loss=dijet_loss, acc=dijet_acc))
                _model.save(filepath)

            # Train on batch
            step += 1
            model.train_on_batch(
                x=[train_batch.x_kinematics, train_batch.x_pid],
                y=train_batch.y)
            # new_lr = np.power(step, -0.5)
            #K.set_value(_model.optimizer.lr, new_lr) 


        ###############################
        # On Epoch End
        ###########################

    #############################
    #
    #############################3
    filepath = os.path.join(log_dir.saved_models.path, "model_final.h5")
    _model.save(filepath)

    print("Training is over! :D")

    meter.add_plot(
        x="step",
        ys=[("train_loss", "Train/Dijet"),
            ("dijet_loss", "Validation/Dijet"),
            ("zjet_loss", "Validation/Z+jet")],
        title="Loss(CrossEntropy)", xlabel="Step", ylabel="Loss")

    meter.add_plot(
        x="step",
        ys=[("train_acc", "Train/Dijet"),
            ("dijet_acc", "Validation/Dijet"),
            ("zjet_acc", "Validation/Z+jet")],
        title="Accuracy", xlabel="Step", ylabel="Acc.")


    meter.finish()
    config.finish()
    
    return log_dir