Exemple #1
0
def train(model, data):
    x, y = data
    # 将数据划分为训练集合验证集
    train_x, valid_x, train_y, valid_y = train_test_split(x,
                                                          y,
                                                          test_size=0.2,
                                                          shuffle=True)
    # 设置检查点
    callbacks_list = [
        keras.callbacks.ModelCheckpoint(filepath=weight_path,
                                        monitor='loss',
                                        save_best_only=True),
        keras.callbacks.LearningRateScheduler(schedule),
    ]
    # 编译模型
    model.compile(optimizer=keras.optimizers.Adam(lr=lr), loss=total_loss)
    # 初始化Lookahead
    lookahead = Lookahead(k=5, alpha=0.5)
    # 插入到模型中
    lookahead.inject(model)
    # 开始时间
    start_time = time()
    # 训练模型
    if not data_augmentation:
        print("不使用数据分割")
        history = model.fit(train_x,
                            train_y,
                            epochs=epoch,
                            batch_size=batch_size,
                            validation_data=(valid_x, valid_y),
                            verbose=1,
                            callbacks=callbacks_list)
    else:
        print("使用数据分割")
        history = model.fit_generator(
            generator=data_generator(train_x, train_y, batch_size),
            steps_per_epoch=(len(train_x) + batch_size - 1) // batch_size,
            epochs=epoch,
            verbose=1,
            callbacks=callbacks_list,
            validation_data=data_generator(valid_x, valid_y, batch_size),
            validation_steps=(len(valid_x) + batch_size - 1) // batch_size)
    model.save(weight_path)  # 保存模型

    # 结束时间
    duration = time() - start_time
    print("Train Finished takes:", "{:.2f} h".format(duration / 3600.0))
    plt.plot(history.history['loss'], label='train')
    plt.plot(history.history['val_loss'], label='valid')
    plt.xlabel('epoch')
    plt.ylabel('loss')
    plt.legend(loc='upper right')
    plt.show()
    return model
def model_fn(objective, optimizer, metrics):
    base_model = efn.EfficientNetB4(
        include_top=False,
        # base_model = seresnext50(include_top=False,
        # base_model = xception(include_top=False,
        # base_model = densenet201(include_top=False,
        # base_model = inceptionresnetv2(include_top=False,
        input_shape=(input_size, input_size, 3),
        classes=num_classes,
        weights='imagenet',
    )
    x = base_model.output
    x = GlobalAveragePooling2D()(x)
    predictions = Dense(num_classes, activation='softmax')(x)
    model1 = Model(inputs=base_model.input, outputs=predictions)
    #     model2 = multi_gpu_model(model1, gpus=3)
    #     model2 = model1
    model1.compile(loss=objective, optimizer=optimizer, metrics=metrics)
    lookahead = Lookahead(k=5, alpha=0.5)  # Initialize Lookahead
    lookahead.inject(model1)  # add into model
    model1.summary()
    return model1
Exemple #3
0
def nvidia(optimizer, source_path, train_generator, validation_generator, train_epochs, \
    num_train_samples, num_validation_samples, batch_size, conv_dropout, fc_dropout):
    '''
    :nvidia model from the paper:
        https://images.nvidia.com/content/tegra/automotive/images/2016/solutions/pdf/end-to-end-dl-using-px.pdf
    '''
    # Layer0 : normalized layer
    model.add(Lambda(lambda x: (x / 255.0) - 0.5, input_shape=(64, 64, 3)))
    # Layer1: Convolutional feature map 24@31x98
    model.add(Convolution2D(24, 5, 5, activation='relu', subsample=(2, 2)))
    model.add(Dropout(conv_dropout))
    # Layer2: Convolutional feature map 36@14x47
    model.add(Convolution2D(36, 5, 5, activation='relu', subsample=(2, 2)))
    model.add(Dropout(conv_dropout))
    # Layer3: Convolutional feature map 48@5x22
    model.add(Convolution2D(48, 5, 5, activation='relu', subsample=(2, 2)))
    model.add(Dropout(conv_dropout))
    # Layer4: Convolutional feature map 64@3x20
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(Dropout(conv_dropout))
    # Layer5: Convolutional feature map 64@1x18
    model.add(Convolution2D(64, 3, 3, activation='relu'))
    model.add(Dropout(conv_dropout))
    # FC1
    model.add(Flatten())
    model.add(Dense(1164, activation='relu'))
    model.add(Dropout(fc_dropout))
    # FC2
    model.add(Dense(100, activation='relu'))
    model.add(Dropout(fc_dropout))
    # FC3
    model.add(Dense(50, activation='relu'))
    model.add(Dropout(fc_dropout))
    # FC4
    model.add(Dense(10, activation='relu'))
    # Outut layer
    model.add(Dense(1))

    model.summary()

    if optimizer == 'adam':
        '''
        Default adam optimizer 
        works to trace1  
        '''
        model.compile(loss='mse', optimizer='adam')

    if optimizer == 'sgd':
        '''
        SGD optimizer
        works to trace2 with 0.005 learning rate 
        '''
        sgd = optimizers.SGD(lr=0.005, decay=1e-6, momentum=0.9, nesterov=True)
        model.compile(loss='mse', optimizer=sgd)

    if optimizer == 'lookahead':
        '''
        new optimizer named lookahead
        source site: paper https://arxiv.org/abs/1907.08610, 
        code by keras https://github.com/bojone/keras_lookahead
        '''
        model.compile(optimizer=optimizers.Adam(1e-3),
                      loss='mse')  # Any optimizer
        lookahead = Lookahead(k=5, alpha=0.5)  # Initialize Lookahead
        lookahead.inject(model)  # add into model

    history_object = model.fit_generator(
        train_generator,
        steps_per_epoch=np.ceil(num_train_samples / batch_size),
        validation_data=validation_generator,
        validation_steps=np.ceil(num_validation_samples / batch_size),
        epochs=train_epochs,
        verbose=1)

    model.save('model.h5')
    print('nvidia-model-epoch{}-{}-{}.h5'.format(source_path, train_epochs,
                                                 optimizer))

    plot_loss(history_object)
Exemple #4
0
    t1 = time()

    dataset = Dataset(args.path + args.dataset, k)
    train, user_review_fea, item_review_fea, testRatings = dataset.trainMatrix, dataset.user_review_fea, \
                                                           dataset.item_review_fea, dataset.testRatings
    num_users, num_items = train.shape
    print("Load data done [%.1f s]. #user=%d, #item=%d, #train=%d, #test=%d" %
          (time() - t1, num_users, num_items, train.nnz, len(testRatings)))

    # Build model
    model = get_model(num_users, num_items, k, num_factors, regs)
    if learner.lower() == "adagrad":
        model.compile(optimizer=Adagrad(lr=learning_rate),
                      loss="mean_squared_error")
        lookahead = Lookahead(k=5, alpha=0.5)  # Initialize Lookahead
        lookahead.inject(model)  # add into model
    elif learner.lower() == "rmsprop":
        model.compile(optimizer=RMSprop(lr=learning_rate),
                      loss="mean_squared_error")
        lookahead = Lookahead(k=5, alpha=0.5)
        lookahead.inject(model)
    elif learner.lower() == "adam":
        model.compile(optimizer=Adam(lr=learning_rate),
                      loss="mean_squared_error")
        lookahead = Lookahead(k=5, alpha=0.5)
        lookahead.inject(model)
    else:
        model.compile(optimizer=SGD(lr=learning_rate),
                      loss="mean_squared_error")
        lookahead = Lookahead(k=5, alpha=0.5)
        lookahead.inject(model)
def main():
    # parser config
    config_file = "./config.ini"
    cp = ConfigParser()
    cp.read(config_file)

    # default config
    output_dir = cp["DEFAULT"].get("output_dir")
    image_source_dir = cp["DEFAULT"].get("image_source_dir")
    base_model_name = cp["DEFAULT"].get("base_model_name")
    class_names = cp["DEFAULT"].get("class_names").split(",")

    # train config
    use_base_model_weights = cp["TRAIN"].getboolean("use_base_model_weights")
    use_trained_model_weights = cp["TRAIN"].getboolean(
        "use_trained_model_weights")
    use_best_weights = cp["TRAIN"].getboolean("use_best_weights")
    output_weights_name = cp["TRAIN"].get("output_weights_name")
    epochs = cp["TRAIN"].getint("epochs")
    batch_size = cp["TRAIN"].getint("batch_size")
    initial_learning_rate = cp["TRAIN"].getfloat("initial_learning_rate")
    generator_workers = cp["TRAIN"].getint("generator_workers")
    image_dimension = cp["TRAIN"].getint("image_dimension")
    train_steps = cp["TRAIN"].get("train_steps")
    patience_reduce_lr = cp["TRAIN"].getint("patience_reduce_lr")
    min_lr = cp["TRAIN"].getfloat("min_lr")
    validation_steps = cp["TRAIN"].get("validation_steps")
    positive_weights_multiply = cp["TRAIN"].getfloat(
        "positive_weights_multiply")
    dataset_csv_dir = cp["TRAIN"].get("dataset_csv_dir")
    # if previously trained weights is used, never re-split
    if use_trained_model_weights:
        # resuming mode
        print("** use trained model weights **")
        # load training status for resuming
        training_stats_file = os.path.join(output_dir, ".training_stats.json")
        if os.path.isfile(training_stats_file):
            # TODO: add loading previous learning rate?
            training_stats = json.load(open(training_stats_file))
        else:
            training_stats = {}
    else:
        # start over
        training_stats = {}

    show_model_summary = cp["TRAIN"].getboolean("show_model_summary")
    # end parser config

    # check output_dir, create it if not exists
    if not os.path.isdir(output_dir):
        os.makedirs(output_dir)

    running_flag_file = os.path.join(output_dir, ".training.lock")
    if os.path.isfile(running_flag_file):
        raise RuntimeError("A process is running in this directory!!!")
    else:
        open(running_flag_file, "a").close()

    try:
        print(f"backup config file to {output_dir}")
        shutil.copy(config_file,
                    os.path.join(output_dir,
                                 os.path.split(config_file)[1]))

        datasets = ["train", "dev", "test"]
        for dataset in datasets:
            shutil.copy(os.path.join(dataset_csv_dir, f"{dataset}.csv"),
                        output_dir)

        # get train/dev sample counts
        train_counts, train_pos_counts = get_sample_counts(
            output_dir, "train", class_names)
        dev_counts, _ = get_sample_counts(output_dir, "dev", class_names)

        # compute steps
        if train_steps == "auto":
            train_steps = int(train_counts / batch_size)
        else:
            try:
                train_steps = int(train_steps)
            except ValueError:
                raise ValueError(f"""
                train_steps: {train_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** train_steps: {train_steps} **")

        if validation_steps == "auto":
            validation_steps = int(dev_counts / batch_size)
        else:
            try:
                validation_steps = int(validation_steps)
            except ValueError:
                raise ValueError(f"""
                validation_steps: {validation_steps} is invalid,
                please use 'auto' or integer.
                """)
        print(f"** validation_steps: {validation_steps} **")

        # compute class weights
        print("** compute class weights from training data **")
        class_weights = get_class_weights(
            train_counts,
            train_pos_counts,
            multiply=positive_weights_multiply,
        )
        print("** class_weights **")
        print(class_weights)

        print("** load model **")
        if use_trained_model_weights:
            if use_best_weights:
                model_weights_file = os.path.join(
                    output_dir, f"best_{output_weights_name}")
            else:
                model_weights_file = os.path.join(output_dir,
                                                  output_weights_name)
        else:
            model_weights_file = None

        model_factory = ModelFactory()
        model = model_factory.get_model(
            class_names,
            model_name=base_model_name,
            use_base_weights=use_base_model_weights,
            weights_path=model_weights_file,
            input_shape=(image_dimension, image_dimension, 3))

        if show_model_summary:
            print(model.summary())

        print("** create image generators **")
        train_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "train.csv"),
            class_names=class_names,
            source_image_dir=image_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=train_steps,
        )
        validation_sequence = AugmentedImageSequence(
            dataset_csv_file=os.path.join(output_dir, "dev.csv"),
            class_names=class_names,
            source_image_dir=image_source_dir,
            batch_size=batch_size,
            target_size=(image_dimension, image_dimension),
            augmenter=augmenter,
            steps=validation_steps,
            shuffle_on_epoch_end=False,
        )

        output_weights_path = os.path.join(output_dir, output_weights_name)
        print(f"** set output weights path to: {output_weights_path} **")

        print("** check multiple gpu availability **")
        gpus = len(os.getenv("CUDA_VISIBLE_DEVICES", "1").split(","))
        if gpus > 1:
            print(f"** multi_gpu_model is used! gpus={gpus} **")
            model_train = multi_gpu_model(model, gpus)
            # FIXME: currently (Keras 2.1.2) checkpoint doesn't work with multi_gpu_model
            checkpoint = MultiGPUModelCheckpoint(
                filepath=output_weights_path,
                base_model=model,
            )
        else:
            model_train = model
            checkpoint = ModelCheckpoint(
                output_weights_path,
                save_weights_only=True,
                save_best_only=True,
                verbose=1,
            )

        print("** compile model with class weights **")
        #model.compile(RAdam(), loss='mse')
        #optimizer = Adam(lr=initial_learning_rate)
        optimizer = RAdam(lr=initial_learning_rate)
        model_train.compile(optimizer=optimizer, loss=[focal_loss])
        lookahead = Lookahead(k=5, alpha=0.5)  # Initialize Lookahead
        lookahead.inject(model_train)  # add into model
        auroc = MultipleClassAUROC(
            sequence=validation_sequence,
            class_names=class_names,
            weights_path=output_weights_path,
            stats=training_stats,
            workers=generator_workers,
        )
        callbacks = [
            checkpoint,
            TensorBoard(log_dir=os.path.join(output_dir, "logs"),
                        batch_size=batch_size),
            ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1,
                              patience=patience_reduce_lr,
                              verbose=1,
                              mode="min",
                              min_lr=min_lr),
            auroc,
            EarlyStopping(monitor='val_loss',
                          mode='min',
                          verbose=1,
                          patience=25),
        ]

        print("** start training **")
        history = model_train.fit_generator(
            generator=train_sequence,
            steps_per_epoch=train_steps,
            epochs=epochs,
            validation_data=validation_sequence,
            validation_steps=validation_steps,
            callbacks=callbacks,
            class_weight=class_weights,
            workers=generator_workers,
            shuffle=False,
        )

        # dump history
        print("** dump history **")
        with open(os.path.join(output_dir, "history.pkl"), "wb") as f:
            pickle.dump({
                "history": history.history,
                "auroc": auroc.aurocs,
            }, f)
        print("** done! **")

    finally:
        os.remove(running_flag_file)