Esempio n. 1
0
 def __init__(self, img_paths, labels, batch_size, img_size, use, preprocess_input):
     assert len(img_paths) == len(labels), "len(img_paths) must equal to len(lables)"
     assert img_size[0] == img_size[1], "img_size[0] must equal to img_size[1]"
     self.x_y = np.hstack((np.array(img_paths).reshape(len(img_paths), 1), np.array(labels)))
     self.batch_size = batch_size
     self.img_size = img_size
     self.use = use
     self.preprocess_input = preprocess_input
     self.eraser = get_random_eraser( s_h=0.3,pixel_level=True)
Esempio n. 2
0
def multi_erase(x):
    eraser = get_random_eraser(p=1.0,
                               s_l=0.02,
                               s_h=0.4,
                               r_1=0.3,
                               r_2=1 / 0.3,
                               v_l=0,
                               v_h=255,
                               pixel_level=True)
    x = eraser(x)
    eraser = get_random_eraser(p=0.8,
                               s_l=0.02,
                               s_h=0.4,
                               r_1=0.3,
                               r_2=1 / 0.3,
                               v_l=0,
                               v_h=255,
                               pixel_level=True)
    x = eraser(x)
    return x
Esempio n. 3
0
 def __init__(self, prefix, labels, batch_size):
     self.name = prefix + 'dataset'
     # Load preprocessed data
     self.X_train = np.load(prefix + 'X_train.npy')
     self.y_train = keras.utils.to_categorical(
         np.load(prefix + 'y_train.npy'))
     self.X_valid = np.load(prefix + 'X_valid.npy')
     self.y_valid = keras.utils.to_categorical(
         np.load(prefix + 'y_valid.npy'))
     self.X_test = np.load(prefix + 'X_test.npy')
     self.y_test = keras.utils.to_categorical(np.load(prefix +
                                                      'y_test.npy'))
     # Make label from/to class converter
     self.labels = labels
     self.label2int = {l: i for i, l in enumerate(labels)}
     self.int2label = {i: l for i, l in enumerate(labels)}
     self.num_classes = len(self.labels)
     # Normalize
     max_amplitude = np.max(
         np.abs(np.vstack([self.X_train, self.X_valid, self.X_test])))
     self.X_train = self.X_train / max_amplitude
     self.X_valid = self.X_valid / max_amplitude
     self.X_test = self.X_test / max_amplitude
     # Add dimension [:, features, timesetep] -> [:, features, timestep, 1]
     self.X_train = self.X_train[..., np.newaxis]
     self.X_valid = self.X_valid[..., np.newaxis]
     self.X_test = self.X_test[..., np.newaxis]
     # Make data generators
     datagen = ImageDataGenerator(
         featurewise_center=True,
         featurewise_std_normalization=True,
         rotation_range=0,
         width_shift_range=0.4,
         height_shift_range=0,
         horizontal_flip=True,
         preprocessing_function=get_random_eraser(
             v_l=np.min(self.X_train),
             v_h=np.max(self.X_train))  # RANDOM ERASER
     )
     datagen.fit(np.r_[self.X_train, self.X_valid, self.X_test])
     test_datagen = ImageDataGenerator(
         featurewise_center=datagen.featurewise_center,
         featurewise_std_normalization=datagen.featurewise_std_normalization
     )
     test_datagen.mean, test_datagen.std = datagen.mean, datagen.std
     self.datagen = datagen
     self.test_datagen = test_datagen
     self.batch_size = batch_size
     self.reset_generators()
    def __init__(self, label_file, input_size=224):
        '''
        img_dir: 图片路径:img_dir + img_name.jpg构成图片的完整路径      
        '''
        # 所有图片的绝对路径
        with open(label_file, 'r') as f:
            #label_file的格式, (label_file image_label)
            self.imgs = list(map(lambda line: line.strip().split(' '), f))

    # 相关预处理的初始化
    #  self.transforms=transform
        self.img_aug = True

        self.transform = get_train_transform(size=cfg.INPUT_SIZE)
        self.eraser = get_random_eraser(s_h=0.1, pixel_level=True)
        self.input_size = cfg.INPUT_SIZE
Esempio n. 5
0
def get_datagen(dataset, batch_size, use_random_eraser=True, use_mixup=True):
    datagen = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        featurewise_std_normalization=True,  # divide inputs by std of the dataset
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=0.6,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=0,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        preprocessing_function=\
            get_random_eraser(v_l=np.min(dataset.X_train), v_h=np.max(dataset.X_train)) \
            if use_random_eraser else None
    )
    test_datagen = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        featurewise_std_normalization=
        True,  # divide inputs by std of the dataset
    )
    datagen.fit(np.r_[dataset.X_train, dataset.X_valid, dataset.X_test])
    test_datagen.mean, test_datagen.std = datagen.mean, datagen.std

    train_flow = datagen.flow(dataset.X_train,
                              dataset.y_train,
                              batch_size=batch_size)
    if use_mixup:
        train_flow = MixupGenerator(dataset.X_train,
                                    dataset.y_train,
                                    alpha=1.0,
                                    batch_size=batch_size,
                                    datagen=datagen)()
    valid_flow = test_datagen.flow(dataset.X_valid,
                                   dataset.y_valid,
                                   shuffle=False)
    y_test_just_for_api = keras.utils.to_categorical(
        np.ones(len(dataset.X_test)))
    test_flow = test_datagen.flow(dataset.X_test,
                                  y_test_just_for_api,
                                  shuffle=False)
    return train_flow, valid_flow, test_flow, datagen, test_datagen
def prepare_generator(df, x_col, y_col, width, height, batch_size, test_size,
                      classes):
    x_train, x_val, y_train, y_val = train_test_split(df[x_col],
                                                      df[y_col],
                                                      test_size=test_size,
                                                      shuffle=True,
                                                      random_state=SEED,
                                                      stratify=df[y_col])
    cutout = get_random_eraser(p=0.9, v_l=0, v_h=1, pixel_level=True)
    train_gen = ImageDataGenerator(rotation_range=45,
                                   width_shift_range=.15,
                                   height_shift_range=.15,
                                   horizontal_flip=True,
                                   vertical_flip=True,
                                   zoom_range=0.5,
                                   preprocessing_function=cutout,
                                   rescale=1. / 255)
    train_generator = train_gen.flow_from_dataframe(
        pd.concat([x_train, y_train], axis=1),
        x_col=x_col,
        y_col=y_col,
        target_size=(width, height),
        batch_size=batch_size,
        class_mode='categorical',
        subset='training',
        classes=classes)
    valid_gen = ImageDataGenerator(rescale=1. / 255)
    valid_generator = valid_gen.flow_from_dataframe(pd.concat([x_val, y_val],
                                                              axis=1),
                                                    x_col=x_col,
                                                    y_col=y_col,
                                                    target_size=(width,
                                                                 height),
                                                    batch_size=batch_size,
                                                    class_mode='categorical',
                                                    subset='training',
                                                    classes=classes)
    return train_generator, valid_generator
Esempio n. 7
0
def getdata(train_path, val_path, test_path):
    # create a data generator

    image_size = config.IMAGE_SIZE

    datagen_batch_size = config.batch_size

    datagen = ImageDataGenerator(horizontal_flip=True,
                                 width_shift_range=0.2,
                                 height_shift_range=0.2,
                                 zoom_range=[0.5, 1.0],
                                 shear_range=0.2,
                                 preprocessing_function=get_random_eraser(
                                     v_l=0, v_h=255))

    # test data shouldn't be augmented

    test_datagen = ImageDataGenerator()

    train_it = datagen.flow_from_directory(train_path,
                                           class_mode='categorical',
                                           batch_size=datagen_batch_size,
                                           target_size=(image_size,
                                                        image_size))
    # load and iterate validation dataset
    val_it = datagen.flow_from_directory(val_path,
                                         class_mode='categorical',
                                         batch_size=datagen_batch_size,
                                         target_size=(image_size, image_size))
    # load and iterate test dataset
    test_it = test_datagen.flow_from_directory(test_path,
                                               class_mode='categorical',
                                               batch_size=datagen_batch_size,
                                               target_size=(image_size,
                                                            image_size))

    return train_it, val_it, test_it
Esempio n. 8
0
p = {
    # your parameter boundaries come here
    #'image_size': [144, 196, 240],
    #'model': ['inception_resnet_v2', 'nasnet'],
    #'learning_rate': [0.1, 0.01],
    #'decay_factor': [1, 2, 5, 10, 100],
    #'momentum': [0.9, 0.95, 0.99],
    'deep_layers': [2, 3, 4],
    'freeze_layers': [339, 399]
}

eraser = get_random_eraser(p=0.8,
                           s_l=0.02,
                           s_h=0.4,
                           r_1=0.3,
                           r_2=1 / 0.3,
                           v_l=0,
                           v_h=255,
                           pixel_level=True)


def preprocess(x):
    x = eraser(x)
    x = preprocess_input(x)
    return x


def input_model(x_train, y_train, x_val, y_val, params):
    config = tf.ConfigProto()
    config.gpu_options.allow_growth = True
    session = tf.Session(config=config)
batch_size_test = 32
epochs = 100
aeons = 1000
alpha = 1

train_data = np.copy(X)  #np.copy(X[tch[:, j]])
val_data = np.copy(Xv)  #np.copy(Xv[tchv[:, j]])
tst_data = np.copy(X2)  #np.copy(X2[tchv[:, j]])

# create data generator for mixup and random erasing for every batch
datagen = keras.preprocessing.image.ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    width_shift_range=7,
    height_shift_range=3,
    preprocessing_function=get_random_eraser(v_l=np.min(train_data),
                                             v_h=np.max(train_data)))
test_datagen = keras.preprocessing.image.ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True
)
datagen.fit(np.r_[train_data])
test_datagen.fit(np.r_[train_data])
training_generator = MixupGenerator(train_data,
                                    tch,
                                    batch_size=batch_size,
                                    alpha=alpha,
                                    datagen=datagen)

# compile model
input, model_output, z_mean, z_log_var = vae_def(feat_length=60, nceps=40)
vae = keras.Model(inputs=[input], outputs=[model_output])
Esempio n. 10
0
def main():
    args = get_args()
    input_path = args.input
    batch_size = args.batch_size
    nb_epochs = args.nb_epochs
    depth = args.depth
    k = args.width
    validation_split = args.validation_split
    use_augmentation = args.aug

    logging.debug("Loading data...")
    image, gender, age, _, image_size, _ = load_data(input_path)
    X_data = image
    y_data_g = np_utils.to_categorical(gender, 2)
    y_data_a = np_utils.to_categorical(age, 101)

    model = WideResNet(image_size, depth=depth, k=k)()
    sgd = SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd, loss=["categorical_crossentropy", "categorical_crossentropy"],
                  metrics=['accuracy'])

    logging.debug("Model summary...")
    model.count_params()
    model.summary()

    logging.debug("Saving model...")
    mk_dir("models")
    with open(os.path.join("models", "WRN_{}_{}.json".format(depth, k)), "w") as f:
        f.write(model.to_json())

    mk_dir("checkpoints")
    callbacks = [LearningRateScheduler(schedule=Schedule(nb_epochs)),
                 ModelCheckpoint("checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
                                 monitor="val_loss",
                                 verbose=1,
                                 save_best_only=True,
                                 mode="auto")
                 ]

    logging.debug("Running training...")

    data_num = len(X_data)
    indexes = np.arange(data_num)
    np.random.shuffle(indexes)
    X_data = X_data[indexes]
    y_data_g = y_data_g[indexes]
    y_data_a = y_data_a[indexes]
    train_num = int(data_num * (1 - validation_split))
    X_train = X_data[:train_num]
    X_test = X_data[train_num:]
    y_train_g = y_data_g[:train_num]
    y_test_g = y_data_g[train_num:]
    y_train_a = y_data_a[:train_num]
    y_test_a = y_data_a[train_num:]

    if use_augmentation:
        datagen = ImageDataGenerator(
            width_shift_range=0.1,
            height_shift_range=0.1,
            horizontal_flip=True,
            preprocessing_function=get_random_eraser(v_l=0, v_h=255))
        training_generator = MixupGenerator(X_train, [y_train_g, y_train_a], batch_size=batch_size, alpha=0.2,
                                            datagen=datagen)()
        hist = model.fit_generator(generator=training_generator,
                                   steps_per_epoch=train_num // batch_size,
                                   validation_data=(X_test, [y_test_g, y_test_a]),
                                   epochs=nb_epochs, verbose=1,
                                   callbacks=callbacks)
    else:
        hist = model.fit(X_train, [y_train_g, y_train_a], batch_size=batch_size, epochs=nb_epochs, callbacks=callbacks,
                         validation_data=(X_test, [y_test_g, y_test_a]))

    logging.debug("Saving weights...")
    model.save_weights(os.path.join("models", "WRN_{}_{}.h5".format(depth, k)), overwrite=True)
    pd.DataFrame(hist.history).to_hdf(os.path.join("models", "history_{}_{}.h5".format(depth, k)), "history")
Esempio n. 11
0
def train_model_with_2_classes(model,train_X,train_Y,test_X,test_Y):
    train_X = (train_X-0.1307)/0.3081
    test_X = (test_X-0.1307)/0.3081
    id_list=[]

    out_dir = "models/"
    if not os.path.exists(out_dir):
        os.makedirs(out_dir)
    for i in range(2,3):
        for j in range(8,10):
            id_list.append([i,j])
            two_classes_id=(train_Y==i)+ (train_Y==j)
            trainX=train_X[two_classes_id]
            trainY=train_Y[two_classes_id]

            two_classes_id=(test_Y==i)+( test_Y==j)
            testX=test_X[two_classes_id]
            testY=test_Y[two_classes_id]

            Y_train = np_utils.to_categorical(trainY, nb_classes)
            Y_test = np_utils.to_categorical(testY, nb_classes)


            # Load model
            weights_file = "models/shake_shake"+str(i)+'_'+str(j)+".h5"
            if os.path.exists(weights_file):
                model.load_weights(weights_file)
                print(weights_file,"Model loaded.\r\n")

            lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=0.5,  # 当标准评估停止提升时,降低学习速率。
                                           cooldown=0, patience=20, min_lr=1e-8)

            model_checkpoint = ModelCheckpoint(weights_file, monitor="val_acc", save_best_only=True,
                                               save_weights_only=True, mode='auto')
            earlyStopping = EarlyStopping(monitor='acc', patience=10, verbose=1, mode='auto')
            callbacks = [lr_reducer, model_checkpoint,earlyStopping]

            train_data = ImageDataGenerator(featurewise_center=True,
                                            featurewise_std_normalization=True,
                                            # preprocessing_function=random_crop_image,
                                            preprocessing_function=get_random_eraser(v_l=0, v_h=1),
                                            rotation_range=10,
                                            width_shift_range=5. / 28,
                                            height_shift_range=5. / 28,
                                            horizontal_flip=True)
            validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)
            for data in (train_data, validation_data):
                data.fit(trainX)
            model.fit_generator(train_data.flow(trainX, Y_train, batch_size=batch_size),
                            steps_per_epoch=len(trainX) // batch_size,
                            epochs=nb_epoch,
                            callbacks=callbacks,
                            validation_data=validation_data.flow(testX, Y_test, batch_size=batch_size),
                            validation_steps=testX.shape[0] // batch_size, verbose=1)
            # model.fit(trainX, Y_train, batch_size=batch_size, epochs=1, verbose=1, validation_data=(testX, Y_test),
            #           callbacks=callbacks)
            yPreds = model.predict(testX)
            yPred = np.argmax(yPreds, axis=1)
            yTrue = testY

            accuracy = metrics.accuracy_score(yTrue, yPred) * 100
            error = 100 - accuracy
            print(i,'pk',j)
            print("Accuracy : ", accuracy)
            print("Error : ", error)
Esempio n. 12
0
def main():
    start = time.time()

    ap = argparse.ArgumentParser()
    ap.add_argument("-e",
                    "--epochs",
                    required=True,
                    type=int,
                    help="Number of epochs",
                    default=25)
    ap.add_argument("-m",
                    "--model_name",
                    required=True,
                    type=str,
                    help="Imagenet model to train",
                    default="xception")
    ap.add_argument("-b",
                    "--batch_size",
                    required=True,
                    type=int,
                    help="Batch size",
                    default=8)
    ap.add_argument("-im_size",
                    "--image_size",
                    required=True,
                    type=int,
                    help="Batch size",
                    default=224)
    args = ap.parse_args()

    # Training dataset loading
    train_data = np.load("train_data.npy")
    train_label = np.load("train_label.npy")
    encoder = LabelEncoder()
    encoder.fit(train_label)
    encoded_y = encoder.transform(train_label)
    Y = utils.to_categorical(encoded_y)

    print("Dataset Loaded...")

    # Train and validation split
    trainX, valX, trainY, valY = train_test_split(train_data,
                                                  Y,
                                                  test_size=0.1,
                                                  shuffle=True,
                                                  random_state=42,
                                                  stratify=Y)
    print(trainX.shape, valX.shape, trainY.shape, valY.shape)

    # Train nad validation image data generator
    trainAug = ImageDataGenerator(
        rescale=1.0 / 255.0,
        preprocessing_function=get_random_eraser(p=0.5,
                                                 s_l=0.02,
                                                 s_h=0.4,
                                                 r_1=0.3,
                                                 r_2=1 / 0.3,
                                                 v_l=0,
                                                 v_h=255,
                                                 pixel_level=False),
        rotation_range=30,
        zoom_range=0.15,
        width_shift_range=0.2,
        height_shift_range=0.2,
        shear_range=0.15,
        horizontal_flip=True,
        fill_mode="nearest",
    )

    valAug = ImageDataGenerator(rescale=1.0 / 255.0)

    model = cnn_model(args.model_name, img_size=args.image_size)

    # Number of trainable and non-trainable parameters
    trainable_count = int(
        np.sum([K.count_params(p) for p in set(model.trainable_weights)]))
    non_trainable_count = int(
        np.sum([K.count_params(p) for p in set(model.non_trainable_weights)]))

    print("Total params: {:,}".format(trainable_count + non_trainable_count))
    print("Trainable params: {:,}".format(trainable_count))
    print("Non-trainable params: {:,}".format(non_trainable_count))

    if not exists("./trained_wts"):
        makedirs("./trained_wts")
    if not exists("./training_logs"):
        makedirs("./training_logs")
    if not exists("./plots"):
        makedirs("./plots")

    # Keras backend
    model_checkpoint = ModelCheckpoint(
        "trained_wts/" + args.model_name + ".hdf5",
        monitor="val_loss",
        verbose=1,
        save_best_only=True,
        save_weights_only=True,
    )

    stopping = EarlyStopping(monitor="val_loss", patience=10, verbose=0)

    clr = CyclicLR(mode=CLR_METHOD,
                   base_lr=MIN_LR,
                   max_lr=MAX_LR,
                   step_size=STEP_SIZE * (trainX.shape[0] // args.batch_size))
    print("Training is going to start in 3... 2... 1... ")

    # Model Training
    H = model.fit_generator(
        trainAug.flow(trainX, trainY, batch_size=args.batch_size),
        steps_per_epoch=len(trainX) // args.batch_size,
        validation_data=valAug.flow(valX, valY),
        validation_steps=len(valX) // args.batch_size,
        epochs=args.epochs,
        callbacks=[model_checkpoint],
    )

    # plot the training loss and accuracy
    plt.style.use("ggplot")
    plt.figure()
    N = args.epochs
    plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
    plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
    plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
    plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
    plt.title("Training Loss and Accuracy")
    plt.xlabel("Epoch #")
    plt.ylabel("Loss/Accuracy")
    plt.legend(loc="lower left")
    plt.savefig("plots/training_plot.png")

    N = np.arange(0, len(clr.history["lr"]))
    plt.figure()
    plt.plot(N, clr.history["lr"])
    plt.title("Cyclical Learning Rate (CLR)")
    plt.xlabel("Training Iterations")
    plt.ylabel("Learning Rate")
    plt.savefig("plots/cyclic_lr.png")

    end = time.time()
    dur = end - start

    if dur < 60:
        print("Execution Time:", dur, "seconds")
    elif dur > 60 and dur < 3600:
        dur = dur / 60
        print("Execution Time:", dur, "minutes")
    else:
        dur = dur / (60 * 60)
        print("Execution Time:", dur, "hours")
Esempio n. 13
0
 train_datagen = ImageDataGenerator(
     rotation_range=0,
     width_shift_range=0.1,
     height_shift_range=0.1,
     brightness_range=(0.9, 1.1),
     shear_range=0.0,
     zoom_range=0.2,
     channel_shift_range=0.0,
     fill_mode='reflect',
     horizontal_flip=True,
     vertical_flip=False,
     rescale=1 / 255,
     preprocessing_function=get_random_eraser(p=0.8,
                                              s_l=0.02,
                                              s_h=0.4,
                                              r_1=0.3,
                                              r_2=1 / 0.3,
                                              v_l=0,
                                              v_h=255,
                                              pixel_level=True))
 valid_datagen = ImageDataGenerator(rescale=1 / 255)
 train = train_datagen.flow_from_directory(TRAIN_DIR,
                                           target_size=IMAGE_SIZE,
                                           color_mode='rgb',
                                           batch_size=BATCH_SIZE,
                                           interpolation='bicubic')
 valid = valid_datagen.flow_from_directory(VAL_DIR,
                                           target_size=IMAGE_SIZE,
                                           color_mode='rgb',
                                           batch_size=BATCH_SIZE,
                                           interpolation='bicubic')
 class_weights = compute_class_weight('balanced', np.arange(0, N_CLASSES),
Esempio n. 14
0
def train_tf(image_size=64,
             batch_size=128,
             lr=0.001,
             min_lr=0.00001,
             epoch=30,
             logging=True,
             save_model=True,
             save_result_to_csv=True,
             lr_reduce_patience=5,
             lr_reduce_factor=0.7,
             n_fold=5,
             aug_config=None,
             create_model=dense_net_121_model,
             three_channel=False):
    gpus = tf.config.experimental.list_physical_devices('GPU')
    if gpus:
        try:
            # Currently, memory growth needs to be the same across GPUs
            for gpu in gpus:
                tf.config.experimental.set_memory_growth(gpu, True)
            logical_gpus = tf.config.experimental.list_logical_devices('GPU')
            print(len(gpus), "Physical GPUs,", len(logical_gpus),
                  "Logical GPUs")
        except RuntimeError as e:
            # Memory growth must be set before GPUs have been initialized
            print(e)
    else:
        raise NotImplementedError("can't train with gpu")

    if aug_config is None:
        aug_config = dict()
    augmentations.IMAGE_SIZE = image_size
    # need to set this to train in rtx gpu

    train = pd.read_csv("data/train.csv")
    train["image_path"] = train["image_id"].apply(
        lambda x: f"data/image_{image_size}/{x}.png")
    train.drop(["grapheme", "image_id"], axis=1, inplace=True)

    x = train["image_path"].values
    y_root = pd.get_dummies(train['grapheme_root']).values
    y_vowel = pd.get_dummies(train['vowel_diacritic']).values
    y_consonant = pd.get_dummies(train['consonant_diacritic']).values
    print(f"overall dataset: "
          f"root - {len(np.unique(train['grapheme_root'].values))} "
          f"vowel - {len(np.unique(train['vowel_diacritic'].values))} "
          f"con - {len(np.unique(train['consonant_diacritic'].values))}")

    transformers = []

    if AFFINE in aug_config:
        seq_augmenter = iaa.Sequential([
            iaa.Sometimes(1.0, iaa.Affine(**aug_config[AFFINE])),
        ])
        transformers.append(lambda img: seq_augmenter.augment_image(img))
    if GRID_MASK in aug_config:
        transformers.append(
            lambda img: grid_mask(img, **aug_config[GRID_MASK]))

    if RANDOM_CUTOUT in aug_config:
        transformers.append(
            lambda img: get_random_eraser(**aug_config[RANDOM_CUTOUT])(img))

    if AUGMIX in aug_config:
        transformers.append(lambda img: augmentations.augment_and_mix(
            img, **aug_config[AUGMIX]))
    """
        32, 0.79
        60, 0.58
        61, 0.68
        62, 0.73
        84, 0.80
        37, 0.86
        45, 0.86
        110, 0.87
        122, 0.85
    """

    skf = StratifiedKFold(n_splits=n_fold, shuffle=True)
    for train_idx, test_idx in skf.split(x, train['grapheme_root'].values):
        x_train, x_test = x[train_idx], x[test_idx]
        y_train_root, y_test_root = y_root[train_idx], y_root[test_idx]
        y_train_consonant, y_test_consonant = y_consonant[
            train_idx], y_consonant[test_idx]
        y_train_vowel, y_test_vowel = y_vowel[train_idx], y_vowel[test_idx]

        root_truth = np.argmax(y_test_root, axis=1)
        vowel_truth = np.argmax(y_test_vowel, axis=1)
        con_truth = np.argmax(y_test_consonant, axis=1)

        print(f"train set: "
              f"root - {len(np.unique(np.argmax(y_train_root, axis=1)))} "
              f"vowel - {len(np.unique(np.argmax(y_train_vowel, axis=1)))} "
              f"con - {len(np.unique(np.argmax(y_train_consonant, axis=1)))}")

        print(f"test set: "
              f"root - {len(np.unique(root_truth))} "
              f"vowel - {len(np.unique(vowel_truth))} "
              f"con - {len(np.unique(con_truth))}")
        c = Counter(vowel_truth)
        vowel_test_percentage = [(i, c[i] / len(vowel_truth) * 100.0)
                                 for i, count in c.most_common()]
        print(f"test vowel percentage: {vowel_test_percentage}")
        c = Counter(con_truth)
        con_test_percentage = [(i, c[i] / len(con_truth) * 100.0)
                               for i, count in c.most_common()]
        print(f"test con percentage: {con_test_percentage}")

        input_shape = (image_size, image_size,
                       3) if three_channel else (image_size, image_size, 3)

        train_gen = BengaliImageMixUpGenerator(
            x_train,
            image_size,
            root=y_train_root,
            vowel=y_train_vowel,
            consonant=y_train_consonant,
            batch_size=batch_size,
            mixup=MIXUP in aug_config,
            alpha=aug_config[MIXUP]["alpha"] if MIXUP in aug_config else 0.2,
            transformers=transformers,
            three_channel=three_channel)

        test_gen = BengaliImageGenerator(x_test,
                                         image_size,
                                         root=y_test_root,
                                         vowel=y_test_vowel,
                                         consonant=y_test_consonant,
                                         batch_size=batch_size,
                                         three_channel=three_channel)

        model = create_model(input_shape)
        optimizer = Adam(learning_rate=lr)
        model.compile(optimizer=optimizer,
                      loss='categorical_crossentropy',
                      metrics=['accuracy'])

        callbacks = [
            ReduceLROnPlateau(monitor='val_root_accuracy',
                              patience=lr_reduce_patience,
                              verbose=1,
                              factor=lr_reduce_factor,
                              min_lr=min_lr),
            OnEpochEnd(train_gen),
        ]

        aug_keys = list(aug_config.keys())
        aug_keys.sort(reverse=True)
        if len(aug_keys) == 0:
            aug_keys.append("base")

        if logging:
            current_timestamp = datetime.now().strftime("%Y%m%d-%H%M%S")
            logdir = f"logs/scalars/{image_size}/{create_model.__name__}/{'_'.join(aug_keys)}/{current_timestamp}"
            callbacks.append(TensorBoard(log_dir=logdir))

        if save_model:
            Path(f"model/{create_model.__name__}").mkdir(parents=True,
                                                         exist_ok=True)
            callbacks.append(
                ModelCheckpoint(
                    f"model/{create_model.__name__}/tf_model_{image_size}_{'_'.join(aug_keys)}.h5",
                    monitor='val_root_accuracy',
                    verbose=1,
                    save_best_only=True,
                    mode='max'))

        model.fit(train_gen,
                  epochs=epoch,
                  callbacks=callbacks,
                  validation_data=test_gen)

        prediction = model.predict(test_gen)
        scores = []

        root_prediction = np.argmax(prediction[0], axis=1)
        scores.append(
            recall_score(root_truth, root_prediction, average='macro'))
        # print(classification_report(root_truth, root_prediction))
        vowel_pred = np.argmax(prediction[1], axis=1)
        scores.append(recall_score(vowel_truth, vowel_pred, average='macro'))
        # print(classification_report(vowel_truth, vowel_pred))
        con_pred = np.argmax(prediction[2], axis=1)
        scores.append(recall_score(con_truth, con_pred, average='macro'))
        # print(classification_report(con_truth, con_pred))

        cv_score = np.average(scores, weights=[2, 1, 1])
        print(cv_score)

        if save_result_to_csv:
            info = {
                "model": create_model.__name__,
                "image_size": image_size,
                "batch_size": batch_size,
                "starting_lr": lr,
                "epoch": epoch,
                "lr_reduce_patience": lr_reduce_patience,
                "lr_reduce_factor": lr_reduce_factor,
                "min_lr": min_lr,
                "augmentation": json.dumps(aug_config),
                "cv_score": cv_score,
                "public_cv": "",
            }

            with open("train_result.csv", 'a+') as write_obj:
                dict_writer = DictWriter(write_obj,
                                         fieldnames=list(info.keys()))
                dict_writer.writerow(info)

        break
Esempio n. 15
0
def main():
    args = get_args()
    input_path = args.input
    batch_size = args.batch_size
    nb_epochs = args.nb_epochs
    max_age = args.max_age + 1
    depth = args.depth
    k = args.width
    transfer_learning = args.transfer_learning
    validation_split = args.validation_split
    use_augmentation = args.aug
    initial_weights = '/home/paula/THINKSMARTER_/Model/demographics-model-prediction/pretrained_models/weights.18-4.06.hdf5'
    # weight_file = '/home/paula/THINKSMARTER_/Model/age-gender-estimation-adapted/checkpoints/weights.09-4.32.hdf5'

    _weight_decay = 0.0005
    _use_bias = False
    _weight_init = "he_normal"

    logging.debug("Loading data...")
    image, gender, age, _, image_size, _ = load_data(input_path)
    X_data = image
    y_data_g = np_utils.to_categorical(gender, 2)
    y_data_a = np_utils.to_categorical(age, max_age)

    if transfer_learning:

        model = WideResNet(image_size, depth=depth, k=k, units_age=101)()
        model.load_weights(initial_weights)

        inputs = model.input
        flatten = model.layers[-3].output  # capa flatten
        dense1 = Dense(units=2,
                       kernel_initializer=_weight_init,
                       use_bias=_use_bias,
                       kernel_regularizer=l2(_weight_decay),
                       activation="softmax")(flatten)
        dense2 = Dense(units=117,
                       kernel_initializer=_weight_init,
                       use_bias=_use_bias,
                       kernel_regularizer=l2(_weight_decay),
                       activation="softmax")(flatten)
        model = Model(inputs=inputs, outputs=[dense1, dense2])

        # ---------------------------------
        # IDEA: fine tuning (nomes entreno les dos ultimes capes)
        # for layer in model.layers[:-2]:
        #     layer.trainable = False

    else:
        model = WideResNet(image_size, depth=depth, k=k, units_age=max_age)()

    sgd = SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(
        optimizer=sgd,
        loss=["categorical_crossentropy", "categorical_crossentropy"],
        metrics=['accuracy'])

    logging.debug("Model summary...")
    model.count_params()
    model.summary()

    if args.plot_model:
        plot_model(model,
                   to_file='experiments_pictures/model_plot.png',
                   show_shapes=True,
                   show_layer_names=True)

    logging.debug("Saving model...")
    mk_dir("models")
    with open(os.path.join("models", "WRN_{}_{}.json".format(depth, k)),
              "w") as f:
        f.write(model.to_json())

    mk_dir("checkpoints")
    # tensorBoard = TensorBoard(log_dir='events', histogram_freq=0, batch_size=32, write_graph=True, write_grads=False, write_images=True, embeddings_freq=0, embeddings_layer_names=None, embeddings_metadata=None, embeddings_data=None)

    callbacks = [
        LearningRateScheduler(schedule=Schedule(nb_epochs)),
        ModelCheckpoint("checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
                        monitor="val_loss",
                        verbose=1,
                        save_best_only=True,
                        mode="auto")
    ]

    logging.debug("Running training...")

    data_num = len(X_data)
    indexes = np.arange(data_num)
    np.random.shuffle(indexes)
    X_data = X_data[indexes]
    y_data_g = y_data_g[indexes]
    y_data_a = y_data_a[indexes]
    train_num = int(data_num * (1 - validation_split))
    X_train = X_data[:train_num]
    X_test = X_data[train_num:]
    y_train_g = y_data_g[:train_num]
    y_test_g = y_data_g[train_num:]
    y_train_a = y_data_a[:train_num]
    y_test_a = y_data_a[train_num:]

    if use_augmentation:
        datagen = ImageDataGenerator(width_shift_range=0.1,
                                     height_shift_range=0.1,
                                     horizontal_flip=True,
                                     preprocessing_function=get_random_eraser(
                                         v_l=0, v_h=255))
        training_generator = MixupGenerator(X_train, [y_train_g, y_train_a],
                                            batch_size=batch_size,
                                            alpha=0.2,
                                            datagen=datagen)()

        hist = model.fit_generator(generator=training_generator,
                                   steps_per_epoch=train_num // batch_size,
                                   validation_data=(X_test,
                                                    [y_test_g, y_test_a]),
                                   epochs=nb_epochs,
                                   verbose=1,
                                   callbacks=callbacks)
    else:
        hist = model.fit(X_train, [y_train_g, y_train_a],
                         batch_size=batch_size,
                         epochs=nb_epochs,
                         callbacks=callbacks,
                         validation_data=(X_test, [y_test_g, y_test_a]))

    logging.debug("Saving weights...")
    model.save_weights(os.path.join("models", "WRN_{}_{}.h5".format(depth, k)),
                       overwrite=True)
    pd.DataFrame(hist.history).to_hdf(
        os.path.join("models", "history_{}_{}.h5".format(depth, k)), "history")

    with open('history_tmp.txt', 'w') as f:
        for key in hist.history:
            print(key, file=f)
        f.write('\n')
        json.dump(hist.history, f)
Esempio n. 16
0
def train():
    mnist = read_data_sets('./data/fashion', reshape=False, validation_size=0,
                           source_url='http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/')
    x_train = mnist.train.images
    y_train = mnist.train.labels
    x_test = mnist.test.images
    y_test = mnist.test.labels
    y_train = to_categorical(y_train, 10)
    y_test = to_categorical(y_test, 10)
    x_train = x_train.reshape((-1, 28, 28, 1))
    x_test = x_test.reshape((-1, 28, 28, 1))

    main_input = Input((28, 28, 1))
    aux_input = Input((10,))

    final_output, side_output = build_model(inputs=main_input,labels=aux_input, n=16, k=8,dropout=0.2)
    model = Model(inputs=[main_input, aux_input], outputs=[final_output, side_output])
    model.summary()

    # optim = optimizers.SGD(lr=initial_learning_rate, momentum=0.9)
    optim = optimizers.Adam(lr=3e-4)
    model.compile(optimizer=optim,
                  loss={'main_out':losses.categorical_crossentropy,'centerlosslayer':zero_loss},loss_weights=[1,0.01], metrics=['accuracy'])


    train_data = ImageDataGenerator(featurewise_center=True,
                                    featurewise_std_normalization=True,
                                    # preprocessing_function=random_crop_image,
                                    preprocessing_function=get_random_eraser(v_l=0, v_h=1),
                                    rotation_range=10,
                                    width_shift_range=5. / 28,
                                    height_shift_range=5. / 28,
                                    horizontal_flip=True)
    validation_data = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True)

    for data in (train_data, validation_data):
        data.fit(x_train)  # 実用を考えると、x_validationでのfeaturewiseのfitは無理だと思う……。

    best_weights_filepath = './model/best_weights.hdf5'
    earlyStopping = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10, verbose=1, mode='auto')
    saveBestModel = keras.callbacks.ModelCheckpoint(best_weights_filepath, monitor='val_loss', verbose=1,
                                               save_best_only=True, mode='auto')

    lr_reducer = keras.callbacks.ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), #当标准评估停止提升时,降低学习速率。
                                   cooldown=0, patience=10, min_lr=1e-6)


    dummy = np.zeros((batch_size, 1))
    def gen_flow_for_two_inputs(gen,X1, y, dummy):
        genX1 = gen.flow(X1, y, batch_size=batch_size, seed=666)
        # genX2 = gen.flow(X1, y1, batch_size=batch_size, seed=666)
        while True:
            X1i = genX1.__next__()
            # X2i = genX2.__next__()
            yield [X1i[0], X1i[1]], [X1i[1],dummy]

    hist = model.fit_generator(gen_flow_for_two_inputs(train_data, x_train, y_train,dummy),
                               # batch_size=batch_size,
                               steps_per_epoch=int(50000 / batch_size),
                               epochs=epochs,
                               verbose=1,
                               validation_data=gen_flow_for_two_inputs(validation_data,x_test, y_test,dummy),
                               validation_steps=int(10000 / batch_size),
                               callbacks=[earlyStopping,saveBestModel,lr_reducer]
                               # validation_data=([x_test, y_test_onehot], [y_test_onehot, dummy2])
                               )
Esempio n. 17
0
def main():
    args = get_args()
    input_path = args.input
    batch_size = args.batch_size
    nb_epochs = args.nb_epochs
    depth = args.depth
    k = args.width
    validation_split = args.validation_split
    use_augmentation = args.aug
    output_path = Path(__file__).resolve().parent.joinpath(args.output_path)
    output_path.mkdir(parents=True, exist_ok=True)

    logging.debug("Loading data...")
    image, gender, age, _, image_size, _ = load_data(input_path)
    X_data = image
    y_data_g = np_utils.to_categorical(gender, 2)
    y_data_a = np_utils.to_categorical(age, 101)

    model = WideResNet(image_size, depth=depth, k=k)()
    sgd = SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(
        optimizer=sgd,
        loss=["categorical_crossentropy", "categorical_crossentropy"],
        metrics=['accuracy'])

    logging.debug("Model summary...")
    model.count_params()
    model.summary()

    callbacks = [
        LearningRateScheduler(schedule=Schedule(nb_epochs)),
        ModelCheckpoint(str(output_path) +
                        "/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
                        monitor="val_loss",
                        verbose=1,
                        save_best_only=True,
                        mode="auto")
    ]

    logging.debug("Running training...")

    data_num = len(X_data)
    indexes = np.arange(data_num)
    np.random.shuffle(indexes)
    X_data = X_data[indexes]
    y_data_g = y_data_g[indexes]
    y_data_a = y_data_a[indexes]
    train_num = int(data_num * (1 - validation_split))
    X_train = X_data[:train_num]
    X_test = X_data[train_num:]
    y_train_g = y_data_g[:train_num]
    y_test_g = y_data_g[train_num:]
    y_train_a = y_data_a[:train_num]
    y_test_a = y_data_a[train_num:]

    if use_augmentation:
        datagen = ImageDataGenerator(width_shift_range=0.1,
                                     height_shift_range=0.1,
                                     horizontal_flip=True,
                                     preprocessing_function=get_random_eraser(
                                         v_l=0, v_h=255))
        training_generator = MixupGenerator(X_train, [y_train_g, y_train_a],
                                            batch_size=batch_size,
                                            alpha=0.2,
                                            datagen=datagen)()
        hist = model.fit_generator(generator=training_generator,
                                   steps_per_epoch=train_num // batch_size,
                                   validation_data=(X_test,
                                                    [y_test_g, y_test_a]),
                                   epochs=nb_epochs,
                                   verbose=1,
                                   callbacks=callbacks)
    else:
        hist = model.fit(X_train, [y_train_g, y_train_a],
                         batch_size=batch_size,
                         epochs=nb_epochs,
                         callbacks=callbacks,
                         validation_data=(X_test, [y_test_g, y_test_a]))

    logging.debug("Saving history...")
    pd.DataFrame(hist.history).to_hdf(
        output_path.joinpath("history_{}_{}.h5".format(depth, k)), "history")
Esempio n. 18
0
 def eraser(self, image_array):
     return get_random_eraser(image_array, v_l=0, v_h=255)
        featurewise_center=False,  # set input mean to 0 over the dataset
        samplewise_center=False,  # set each sample mean to 0
        featurewise_std_normalization=
        False,  # divide inputs by std of the dataset
        samplewise_std_normalization=False,  # divide each input by its std
        zca_whitening=False,  # apply ZCA whitening
        rotation_range=
        0,  # randomly rotate images in the range (degrees, 0 to 180)
        width_shift_range=
        0.1,  # randomly shift images horizontally (fraction of total width)
        height_shift_range=
        0.1,  # randomly shift images vertically (fraction of total height)
        horizontal_flip=True,  # randomly flip images
        vertical_flip=False,  # randomly flip images
        preprocessing_function=get_random_eraser(v_l=0,
                                                 v_h=1,
                                                 pixel_level=pixel_level))

    # Compute quantities required for featurewise normalization
    # (std, mean, and principal components if ZCA whitening is applied).
    datagen.fit(x_train)

    # Fit the model on the batches generated by datagen.flow().
    model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),
                        steps_per_epoch=x_train.shape[0] // batch_size,
                        validation_data=(x_test, y_test),
                        epochs=epochs,
                        verbose=1,
                        workers=4,
                        callbacks=callbacks)
Esempio n. 20
0
import sys

import numpy as np
from uuid import uuid1
import os
from custom_charset import markReplaceDict
from keys import alphabet as charset
import time
from random_eraser import get_random_eraser
reload(sys)
sys.setdefaultencoding('utf8')

erase = get_random_eraser(p=1,
                          s_l=0.01,
                          s_h=0.02,
                          r_1=0.3,
                          r_2=1 / 0.3,
                          v_l=0,
                          v_h=255,
                          pixel_level=False)


def getRandomDateList(num=20):
    a1 = (2000, 1, 1, 0, 0, 0, 0, 0, 0)  # 设置开始日期时间元组(2000-01-01 00:00:00)
    a2 = (2050, 12, 30, 23, 59, 59, 0, 0, 0)  # 设置结束日期时间元组(2050-12-31 23:59:59)

    start = time.mktime(a1)  # 生成开始时间戳
    end = time.mktime(a2)  # 生成结束时间戳
    ret = []
    # 随机生成10个日期字符串
    for i in range(num):
        t = random.randint(start, end)  # 在开始和结束时间戳中随机取出一个
Esempio n. 21
0
def main():
    args = get_args()
    input_path = args.input
    batch_size = args.batch_size
    nb_epochs = args.nb_epochs
    depth = args.depth
    k = args.width
    validation_split = args.validation_split
    use_augmentation = args.aug

    logging.debug("Loading data...")
    image, gender, age, _, image_size, _ = load_data(input_path)
    X_data = image
    y_data_a = np_utils.to_categorical(age, 101)

    #custom parameters
    nb_class = 2
    hidden_dim = 512

    vgg_model = VGGFace(include_top=False, input_shape=(224, 224, 3))
    last_layer = vgg_model.get_layer('pool5').output
    x = Flatten(name='flatten')(last_layer)
    x = Dense(hidden_dim, activation='relu', name='fc6')(x)
    x = Dense(hidden_dim, activation='relu', name='fc7')(x)
    out = Dense(nb_class, activation='softmax', name='fc8')(x)
    model = Model(vgg_model.input, out)
    sgd = SGD(lr=0.1, momentum=0.9, nesterov=True)
    model.compile(optimizer=sgd,
                  loss=["categorical_crossentropy"],
                  metrics=['accuracy'])

    logging.debug("Model summary...")
    model.count_params()
    model.summary()

    logging.debug("Saving model...")
    mk_dir("models")
    with open(os.path.join("models", "vgg_{}_{}.json".format(depth, k)),
              "w") as f:
        f.write(model.to_json())

    mk_dir("checkpoints")
    callbacks = [
        LearningRateScheduler(schedule=Schedule(nb_epochs)),
        ModelCheckpoint("checkpoints/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
                        monitor="val_loss",
                        verbose=1,
                        save_best_only=True,
                        mode="auto")
    ]

    logging.debug("Running training...")

    data_num = len(X_data)
    indexes = np.arange(data_num)
    np.random.shuffle(indexes)
    X_data = X_data[indexes]
    y_data_g = y_data_g[indexes]
    y_data_a = y_data_a[indexes]
    train_num = int(data_num * (1 - validation_split))
    X_train = X_data[:train_num]
    X_test = X_data[train_num:]
    y_train_g = y_data_g[:train_num]
    y_test_g = y_data_g[train_num:]
    y_train_a = y_data_a[:train_num]
    y_test_a = y_data_a[train_num:]

    if use_augmentation:
        datagen = ImageDataGenerator(width_shift_range=0.1,
                                     height_shift_range=0.1,
                                     horizontal_flip=True,
                                     preprocessing_function=get_random_eraser(
                                         v_l=0, v_h=255))
        training_generator = MixupGenerator(X_train, [y_train_g, y_train_a],
                                            batch_size=batch_size,
                                            alpha=0.2,
                                            datagen=datagen)()
        hist = model.fit_generator(generator=training_generator,
                                   steps_per_epoch=train_num // batch_size,
                                   validation_data=(X_test,
                                                    [y_test_g, y_test_a]),
                                   epochs=nb_epochs,
                                   verbose=1,
                                   callbacks=callbacks)
    else:
        hist = model.fit(X_train, [y_train_g, y_train_a],
                         batch_size=batch_size,
                         epochs=nb_epochs,
                         callbacks=callbacks,
                         validation_data=(X_test, [y_test_g, y_test_a]))

    logging.debug("Saving weights...")
    model.save_weights(os.path.join("models", "vgg_{}_{}.h5".format(depth, k)),
                       overwrite=True)
    pd.DataFrame(hist.history).to_hdf(
        os.path.join("models", "history_{}_{}.h5".format(depth, k)), "history")
Esempio n. 22
0
def main():
    logging.debug("Reading Configuration...")
    args = get_args()    
    Config = configparser.ConfigParser()
    Config.read(args.config_path)
    def ConfigSectionMap(section):
        dict1 = {}
        options = Config.options(section)
        for option in options:
            try:
                dict1[option] = Config.get(section, option)
                if dict1[option] == -1:
                    DebugPrint("skip: %s" % option)
            except:
                print("exception on %s!" % option)
                dict1[option] = None
        return dict1

    #Loading Fixed parameters
    input_path          = ConfigSectionMap("Fixed")['input_path']
    batch_size          = int(ConfigSectionMap("Fixed")['batch_size'])
    nb_epochs           = int(ConfigSectionMap("Fixed")['nb_epochs'])
    validation_split    = float(ConfigSectionMap("Fixed")['validation_split'])
    dim                 = int(ConfigSectionMap("Fixed")['dimension'])
    use_augmentation    = bool(ConfigSectionMap("Fixed")['use_augmentation'])
    metrics_type        = ConfigSectionMap("Fixed")['metrics']
    history             = ConfigSectionMap("Fixed")['history_save_path']
    checkpoints         = ConfigSectionMap("Fixed")['checkpoint_save_path']
    logs_dir            = ConfigSectionMap("Fixed")['log_save_path']

    #Loading parameters that vary over different configurations
    config_number       = args.config_number
    distribution        = ConfigSectionMap(config_number)['distribution']
    feature_extractor   = ConfigSectionMap(config_number)['feature_extractor']
    sigma               = float(ConfigSectionMap(config_number)['sigma'])
    optimizer_type      = ConfigSectionMap(config_number)['optimizer']
    loss_type           = ConfigSectionMap(config_number)['loss']


    logging.debug("Loading data...")
    image, _, age, _, image_size, _ = load_data(input_path)
    X_data = image

    #Alter age according to distribution type
    if distribution == "GaussBins":    
        age = age[:,np.newaxis]
        lin_y = np.linspace(0,100,dim)[:,np.newaxis]
        y_data_a = (1/(sigma*np.sqrt(2*np.pi)))*np.exp(-np.square((age-lin_y.T)/(np.sqrt(2)*sigma)))
    elif distribution == "Cls":
        y_data_a = np_utils.to_categorical(age, dim)
    print(y_data_a.shape)

    data_num = len(X_data)
    #Randomly shuffle data
    indexes = np.arange(data_num)
    np.random.shuffle(indexes)
    X_data = X_data[indexes]
    y_data_a = y_data_a[indexes]
    #Split in test train set
    train_num = int(data_num * (1 - validation_split))
    X_train = X_data[:train_num]
    X_test = X_data[train_num:]
    y_train_a = y_data_a[:train_num]
    y_test_a = y_data_a[train_num:]

    #Choose network
    if feature_extractor == "WideResNet":
        model = WideResNet(image_size, depth=16, k=8)()
    
    #Choose optimizer
    if optimizer_type == "sgd":
        optimizer = SGD(lr=0.1, momentum=0.9, nesterov=True)

    #Choose loss
    if loss_type == "kullback_leibler_divergence":
        loss = "kullback_leibler_divergence"
    elif loss_type == "Wasserstein":
        loss = Wasserstein  
    elif loss_type == "wass1":
        loss = Wass1  
    elif loss_type == "loss1":
        loss = custom_loss1      
    elif loss_type == "loss2":
        loss = custom_loss2     
    elif loss_type == "loss3":
        loss = "mean_squared_error"                        
    elif loss_type == "loss4":
        loss = "mean_absolute_error"                        
    elif loss_type == "categorical_crossentropy":
        loss = "categorical_crossentropy"

    #Choose metric
    if metrics_type == "mean_absolute_error":
        metric = mean_absolute_error

    #Final compilation
    model.compile(optimizer=optimizer, loss=[loss], metrics=[metric])

    logging.debug("Model summary...")
    model.count_params()
    # model.summary()

    #Callbacks
    json_log = open(os.path.join(logs_dir,"{}_{}_{:.5}_{}_{}.log".format(distribution,feature_extractor,loss_type,optimizer_type,sigma)),
                    mode='wt',
                    buffering=1)
    logging_callback = LambdaCallback(
        on_train_begin=lambda logs: json_log.write(
            json.dumps({'distribution': distribution, 'feature_extractor': feature_extractor,
                         'loss_type': loss_type, 'optimizer_type': optimizer_type, 'sigma': sigma}) + '\n'),
        on_epoch_end=lambda epoch, logs: json_log.write(
            json.dumps({'epoch': epoch, 'val_mean_absolute_error': logs['val_mean_absolute_error'], 'val_loss': logs['val_loss'], 'mean_absolute_error': logs['mean_absolute_error'], 'loss': logs['loss']}) + '\n'),
        on_train_end=lambda logs: json_log.close()
    )
    callbacks = [LearningRateScheduler(schedule=Schedule(nb_epochs)),
                 ModelCheckpoint(os.path.join(checkpoints,"weights.{}_{}_{:.5}_{}_{}.hdf5".format(distribution,feature_extractor,loss_type,optimizer_type,sigma)),
                                 monitor="val_mean_absolute_error",
                                 verbose=1,
                                 save_best_only=True,
                                 mode="auto"),
                 logging_callback,
                 TensorBoard(log_dir=os.path.join(logs_dir,"{}_{}_{:.5}_{}_{}/".format(distribution,feature_extractor,loss_type,optimizer_type,sigma)),
                 histogram_freq=0, batch_size=batch_size, write_graph=False, write_grads=False, write_images=False)                 ]


    logging.debug("Running training...")
    if use_augmentation:
        datagen = ImageDataGenerator(
            width_shift_range=0.1,
            height_shift_range=0.1,
            horizontal_flip=True,
            preprocessing_function=get_random_eraser(v_l=0, v_h=255))
        training_generator = MixupGenerator(X_train, y_train_a, batch_size=batch_size, alpha=0.2,
                                            datagen=datagen)()
        hist = model.fit_generator(generator=training_generator,
                                   steps_per_epoch=train_num // batch_size,
                                   validation_data=(X_test, y_test_a),
                                   epochs=nb_epochs, verbose=1,
                                   callbacks=callbacks)
    else:
        hist = model.fit(X_train, y_train_a, batch_size=batch_size, epochs=nb_epochs, verbose=1, callbacks=callbacks,
                         validation_data=(X_test, y_test_a))

    logging.debug("Saving history and graphs...")
    pd.DataFrame(hist.history).to_hdf(os.path.join(history, "history.{}_{}_{:.5}_{}_{}.hdf5".format(distribution,feature_extractor,loss_type,optimizer_type,sigma)), "history")
Esempio n. 23
0
shutil.copyfile(pretrained_path, model_final)
parser = argparse.ArgumentParser()
# parser.add_argument('dataset_root')
# parser.add_argument('classes')
# parser.add_argument('result_root')
parser.add_argument('--epochs_pre', type=int, default=2)
parser.add_argument('--epochs_fine', type=int, default=2)
parser.add_argument('--batch_size_pre', type=int, default=16)
parser.add_argument('--batch_size_fine', type=int, default=16)
parser.add_argument('--lr_pre', type=float, default=1e-3)
parser.add_argument('--lr_fine', type=float, default=1e-4)
parser.add_argument('--snapshot_period_pre', type=int, default=1)
parser.add_argument('--snapshot_period_fine', type=int, default=1)
# parser.add_argument('--split', type=float, default=0.8)

eraser = get_random_eraser()


def categorical_focal_loss(gamma=2., alpha=.25):
    """
    Softmax version of focal loss.
           m
      FL = ∑  -alpha * (1 - p_o,c)^gamma * y_o,c * log(p_o,c)
          c=1
      where m = number of classes, c = class and o = observation
    Parameters:
      alpha -- the same as weighing factor in balanced cross entropy
      gamma -- focusing parameter for modulating factor (1-p)
    Default value:
      gamma -- 2.0 as mentioned in the paper
      alpha -- 0.25 as mentioned in the paper
Esempio n. 24
0
for img in datagen.flow(x_train[1:2], batch_size=1):
    imgs.append(img[0])
    if len(imgs) >= max_img_num: 
      break
show_imgs(imgs)

!git clone https://github.com/yu4u/cutout-random-erasing ./random_eraser 


import sys
sys.path.append("/content/random_eraser/") 

from random_eraser import get_random_eraser

datagen = ImageDataGenerator(
            preprocessing_function = get_random_eraser(p=0.5, s_l=0.02, s_h=0.2, r_1=0.3, r_2=1/0.3, v_l=0, v_h=0))

max_img_num = 12
imgs = []
for img in datagen.flow(x_train[1:2], batch_size=1):
    imgs.append(img[0])

    if len(imgs) >= max_img_num: 
      break
show_imgs(imgs)

!git clone https://github.com/yu4u/cutout-random-erasing ./random_eraser 

import sys
sys.path.append("/content/random_eraser/")
Esempio n. 25
0
callbacks = [
    LearningRateScheduler(schedule=Schedule(nb_epochs, 0.1)),  #Callbacks
    ModelCheckpoint(str(output_path) +
                    "/weights.{epoch:02d}-{val_loss:.2f}.hdf5",
                    monitor="val_loss",
                    verbose=1,
                    save_best_only=True,
                    mode="auto"),
    TensorBoard(log_dir='TensorBoard/imdb-64'.format(namecall))
]

if aug:
    datagen = ImageDataGenerator(width_shift_range=0.1,
                                 height_shift_range=0.1,
                                 horizontal_flip=True,
                                 preprocessing_function=get_random_eraser(
                                     v_l=0, v_h=255))

    training_generator = MixupGenerator(x_train, [y_train_g, y_train_a],
                                        batch_size=32,
                                        alpha=0.2,
                                        datagen=datagen)()

    hist = model.fit_generator(generator=training_generator,
                               steps_per_epoch=train_num // batch_size,
                               validation_data=(x_test, [y_test_g, y_test_a]),
                               epochs=nb_epochs,
                               verbose=1,
                               callbacks=callbacks)
else:
    hist = model.fit(x_train, [y_train_g, y_train_a],
                     batch_size=32,
Esempio n. 26
0
def train():
    x, y_a, y_g, y_r = load_data()
    print(x.shape)
    print(y_a.shape)
    print(y_g.shape)
    print(y_r.shape)

    train_index = int(len(x) * (1 - test_split))

    x_train = x[:train_index]
    y_train_a = y_a[:train_index]
    y_train_g = y_g[:train_index]
    y_train_r = y_r[:train_index]

    x_test = x[train_index:]
    y_test_a = y_a[train_index:]
    y_test_g = y_g[train_index:]
    y_test_r = y_r[train_index:]

    model = Face(train=True)
    opt = Adam(lr=initial_lr)
    #opt = SGD(lr=initial_lr, momentum=0.9, nesterov=True)
    model.compile(optimizer=opt,
                  loss=[
                      'categorical_crossentropy', 'categorical_crossentropy',
                      'categorical_crossentropy'
                  ],
                  metrics=['accuracy'])

    callbacks = [
        LearningRateScheduler(schedule=Schedule(nb_epochs, initial_lr)),
        ModelCheckpoint(
            weights_output_path + "/face_weights.{epoch:02d}"
            "-val_loss-{val_loss:.2f}-val_age_loss-{val_predications_age_loss:.2f}"
            "-val_gender_loss-{val_predications_gender_loss:.2f}-val_race_loss-{val_predications_race_loss:.2f}.utk.h5",
            monitor="val_loss",
            verbose=1,
            save_best_only=True,
            mode="auto"),
        TensorBoard(log_dir='logs\{0}-{1}'.format(model.name, time.time()),
                    histogram_freq=1,
                    batch_size=batch_size,
                    write_graph=True,
                    write_grads=False,
                    write_images=True,
                    embeddings_freq=0,
                    embeddings_layer_names=None,
                    embeddings_metadata=None,
                    embeddings_data=None,
                    update_freq=500)
    ]

    # if use data augmentation
    if not data_augmentation:
        history = model.fit(x_train, [y_train_a, y_train_g, y_train_r],
                            batch_size=batch_size,
                            epochs=nb_epochs,
                            callbacks=callbacks,
                            validation_data=(x_test,
                                             [y_test_a, y_test_g, y_test_r]))
    else:
        datagen = ImageDataGenerator(width_shift_range=0.1,
                                     height_shift_range=0.1,
                                     horizontal_flip=True,
                                     preprocessing_function=get_random_eraser(
                                         v_l=0, v_h=255))
        training_generator = MixupGenerator(x_train,
                                            [y_train_a, y_train_g, y_train_r],
                                            batch_size=batch_size,
                                            alpha=0.2,
                                            datagen=datagen)()
        history = model.fit_generator(
            generator=training_generator,
            steps_per_epoch=len(x_train) // batch_size,
            validation_data=(x_test, [y_test_a, y_test_g, y_test_r]),
            epochs=nb_epochs,
            verbose=1,
            callbacks=callbacks)
Esempio n. 27
0
    checkpoint = ModelCheckpoint(join(refine_path,
                                      'best_semi_%d_{val_acc:.5f}.h5' % i),
                                 monitor='val_acc',
                                 verbose=1,
                                 save_best_only=True)
    early = EarlyStopping(monitor="val_acc", mode="max", patience=60)

    print("#" * 50)
    print("Fold: ", i)

    datagen = ImageDataGenerator(
        featurewise_center=True,  # set input mean to 0 over the dataset
        width_shift_range=0.2,
        height_shift_range=0.2,
        horizontal_flip=True,
        preprocessing_function=get_random_eraser(
            v_l=np.min(X_train), v_h=np.max(X_train))  # Trainset's boundaries.
    )

    mygenerator = MixupGenerator(X_train,
                                 Y_train,
                                 alpha=1.0,
                                 batch_size=128,
                                 datagen=datagen)

    history = model.fit_generator(mygenerator(),
                                  steps_per_epoch=X_train.shape[0] // 128,
                                  epochs=10000,
                                  validation_data=(X_test, Y_test),
                                  callbacks=[checkpoint, early])
Esempio n. 28
0
    def fine_tune(MODEL,
                  model_name,
                  optimizer,
                  lr,
                  epoch,
                  patience,
                  batch_size,
                  weights,
                  X=x_train,
                  test=None):
        # Fine-tune the model

        from random_eraser import get_random_eraser
        datagen = ImageDataGenerator(horizontal_flip=True,
                                     preprocessing_function=get_random_eraser(
                                         v_h=60, pixel_level=True))

        val_datagen = ImageDataGenerator()

        inputs = Input(input_shape)
        x = inputs
        cnn_model = MODEL(include_top=False,
                          input_shape=input_shape,
                          weights=None)
        x = cnn_model(x)
        x = GlobalAveragePooling2D()(x)
        x = Dropout(0.5)(x)
        x = Dense(128, activation='relu', name='sim')(x)
        x = Dropout(0.5)(x)
        x = Dense(n_class, activation='softmax', name='predictions')(x)
        model = Model(inputs=inputs, outputs=x)

        # Loading weights
        try:
            model.load_weights(model_name + '.h5')
            print('Load ' + model_name + '.h5 successfully.')
        except:
            if weights == 'imagenet':
                print('Start computing ' + model_name +
                      ' bottleneck feature: ')
                features = get_features(MODEL, X)

                # Training models
                inputs = Input(features.shape[1:])
                x = inputs
                x = Dropout(0.5)(x)
                x = Dense(128, activation='relu', name='sim')(x)
                x = Dropout(0.5)(x)
                x = Dense(n_class, activation='softmax', name='predictions')(x)
                model_fc = Model(inputs, x)
                model_fc.compile(optimizer='adam',
                                 loss='categorical_crossentropy',
                                 metrics=['accuracy'])
                h = model_fc.fit(features,
                                 y_train,
                                 batch_size=128,
                                 epochs=5,
                                 validation_split=0.1)
                model_fc.save('fc_' + model_name + '.h5')
                model.load_weights('fc_' + model_name + '.h5', by_name=True)

        print("Optimizer=" + optimizer + " lr=" + str(lr) + " \n")
        if optimizer == "Adam":
            model.compile(loss='categorical_crossentropy',
                          optimizer='adam',
                          metrics=['accuracy'])
        elif optimizer == "SGD":
            model.compile(loss='categorical_crossentropy',
                          optimizer=SGD(lr=lr, momentum=0.9, nesterov=True),
                          metrics=['accuracy'])

        if not test:
            datagen.fit(x_train)
            val_datagen.fit(x_val)

            class LossHistory(keras.callbacks.Callback):
                def on_train_begin(self, logs={}):
                    self.losses = []

                def on_epoch_end(self, batch, logs={}):
                    self.losses.append(
                        (logs.get('loss'), logs.get("val_loss")))

            history = LossHistory()
            early_stopping = EarlyStopping(monitor='val_loss',
                                           patience=patience,
                                           verbose=1,
                                           mode='auto')
            checkpointer = ModelCheckpoint(filepath=model_name + '.h5',
                                           verbose=0,
                                           save_best_only=True)
            reduce_lr = ReduceLROnPlateau(factor=0.5, patience=3, verbose=1)
            if optimizer == "Adam":
                callbacks = [history, early_stopping, checkpointer]
            else:
                callbacks = [history, early_stopping, checkpointer, reduce_lr]
            h = model.fit_generator(datagen.flow(x_train,
                                                 y_train,
                                                 batch_size=batch_size),
                                    steps_per_epoch=len(x_train) / batch_size,
                                    validation_data=val_datagen.flow(
                                        x_val, y_val, batch_size=batch_size),
                                    validation_steps=len(x_val) / batch_size,
                                    epochs=epoch,
                                    callbacks=callbacks)
            return h
        else:
            print('Evalute on test set')
            val_datagen.fit(x_test)
            score = model.evaluate_generator(
                val_datagen.flow(x_test, y_test, batch_size=batch_size),
                len(x_test) / batch_size)
            print(score)
            return score
Esempio n. 29
0
        X_train, y_train, X_val, y_val = split_data(X, y, int(val_set_num))
        # X_train, X_val = normalize(X_train, X_val)

        X_train = np.concatenate((X_train, X_semi))
        y_train = np.concatenate((y_train, y_semi))

        tmpidx = list(range(len(X_train)))
        random.shuffle(tmpidx)

        X_train = X_train[tmpidx]
        y_train = y_train[tmpidx]

        datagen = ImageDataGenerator(width_shift_range=0.05,
                                     height_shift_range=0.05,
                                     shear_range=0.084375,
                                     preprocessing_function=get_random_eraser(v_l=np.min(X_train),v_h=np.max(X_train)))
        
        training_generator = MixupGenerator(X_train, y_train, batch_size=32, alpha=0.5, datagen=datagen)()

        filename = os.path.join(base_path, 'phase3_mfcc4_resnet18_3/model' + val_set_num)
        if not os.path.exists(filename):
            os.makedirs(filename)

        callback = ModelCheckpoint(filename + '/weights.{epoch:04d}-{val_loss:.4f}-{val_acc:.4f}.h5', monitor='val_acc', save_best_only=True, verbose=1)
        early = EarlyStopping(monitor='val_acc', mode='max', patience=30, verbose=1)

        # model = resnet.ResnetBuilder.build_resnet_152((1, 20, 690), 41)
        modelfile = os.path.join(base_path, 'cnn_model_18/model' + val_set_num)
        model = load_model(modelfile)

        # model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])