Beispiel #1
0
def train_binary_model(base_model, model_name, already_trained_model=None):
    x_train, y_train, x_test, y_test = prepare_data()
    if not already_trained_model:
        model = StandardModel(base_model, (512, 512, 3),
                              classes=2,
                              use_softmax=True)
        model = model.build_model()
        model.compile(Adamax(), loss='binary_crossentropy', metrics=['acc'])
        model.fit_generator(DataGenerator(x_train,
                                          labels=y_train,
                                          n_classes=2,
                                          batch_size=8),
                            epochs=1)
        model.save(model_name)
    else:
        if os.path.exists(already_trained_model):
            model = keras.models.load_model(already_trained_model)
            model.compile(Adamax(),
                          loss='binary_crossentropy',
                          metrics=['acc'])
            model.fit_generator(DataGenerator(x_train,
                                              labels=y_train,
                                              n_classes=2,
                                              batch_size=8),
                                epochs=1)
            model.save(model_name)
        else:
            print_error("Provided model file doesn't exist! Exiting...")
            sys.exit(1)
Beispiel #2
0
def train_model(model, partitions, train_labels, train_path, parameter,
                preprocessor):
    target_names = train_labels.drop(["Target", "Id"], axis=1).columns

    predictions = []
    histories = []

    for i, partition in enumerate(partitions):
        print("training in partition ", i + 1)

        training_generator = DataGenerator(partition['train'], train_labels,
                                           parameter, preprocessor)
        validation_generator = DataGenerator(partition['validation'],
                                             train_labels, parameter,
                                             preprocessor)
        predict_generator = PredictGenerator(partition['validation'],
                                             preprocessor, train_path)

        model.set_generators(training_generator, validation_generator)
        histories.append(model.learn())

        proba_predictions = model.predict(predict_generator)
        proba_predictions = pd.DataFrame(index=partition['validation'],
                                         data=proba_predictions,
                                         columns=target_names)

        predictions.append(proba_predictions)

    return predictions, histories
def train_model(use_multiprocessing=false):
    date = datetime.datetime.now()
    model_filename = "{0}-{1}-{2}_lstm_model".format(
        date.month, date.day, date.year)

    train_generator = DataGenerator(IMDB_DATA_PATH, IMDB_DATA_PATH, 100)
    val_generator = DataGenerator(IMDB_DATA_PATH, IMDB_DATA_PATH, "val", 100)
    test_generator = DataGenerator(IMDB_DATA_PATH, IMDB_DATA_PATH, "test", 100)
    model = get_model(INPUT_SHAPE)

    # Creates a directory to save tensorboard callback logs.
    tensorboard_path_dir = BASE_PATH/"logs"/model_filename
    if not tensorboard_path_dir.exists():
        tensorboard_path_dir.mkdir(exist_ok=True, parents=True)

    tbCallBack = keras.callbacks.TensorBoard(
        log_dir=str(tensorboard_path_dir),
        histogram_freq=10,
        batch_size=32,
        write_graph=True,
        write_grads=False,
        write_images=False,
        embeddings_freq=0,
        embeddings_layer_names=None,
        embeddings_metadata=None
    )

    model.compile(loss='binary_crossentropy', optimizer='adam',
                  metrics=[f1, metrics.binary_accuracy])
    model.fit_generator(
        generator=train_generator,
        epochs=100,
        callbacks=[tbCallBack],
        validation_data=val_generator,
        validation_freq=10,
        workers=2,
        use_multiprocessing=True,
        shuffle=True,
    )

    score = model.evaluate_generator(
        test_generator, use_multiprocessing=use_multiprocessing, verbose=0)
    score_train = model.evaluate_generator(
        val_generator, use_multiprocessing=use_multiprocessing, verbose=0)
    score_cv = model.evaluate_generator(
        train_generator, use_multiprocessing=use_multiprocessing, verbose=0)
    # needs to be fixed, violates abstraction barriers
    model_dict = load_model_dict()
    vn = 0
    if len(model_dict) == 0:
        vn = 1
    else:
        vn = model_dict.keys()[-1] + 1
    save_model(model, model_filename, vn)
Beispiel #4
0
    def __init__(self, 
                 n_jobs=-1, 
                 n_epochs = 5,
                 batch_size = 1, 
                 chunk_size = 32,
                 learning_rate = .001,
                 generator=None,
                 **kwargs):
        super(HogWildRegressor, self).__init__(**kwargs)

        if self.loss not in self.losses:
            raise Exception("Loss '%s' not supported")

        self.batch_size = batch_size
        self.learning_rate = learning_rate
        self.gradient = self.losses.get(self.loss)
        self.n_jobs = n_jobs
        self.n_epochs = n_epochs
        self.chunk_size = chunk_size
        self.shared_weights = SharedWeights

        if not generator:
            self.generator = DataGenerator(shuffle= self.shuffle,
                                           chunk_size = self.chunk_size,
                                           verbose = self.verbose)
Beispiel #5
0
n_epochs = 100
sgd = optimizers.SGD(lr=LearningRate,decay=LearningRate/n_epochs,momentum = 0.9,nesterov = True)
top_model.compile(optimizer = sgd,loss = 'categorical_crossentropy',metrics=['accuracy'])
trainable_params = int(np.sum([K.count_params(p)for p in set(top_model.trainable_weights)]))
non_trainable_params = int(np.sum([K.count_params(p) for p in set(top_model.non_trainable_weights)]))
print("model Stats")
print("="*30)
print("Total Parameters:{:,}".format((trainable_params+non_trainable_params)))
print("Non-Trainable Parameters:{:,}".format(non_trainable_params))
print("Trainable Parameters:{:,}\n".format(trainable_params))
train_folders = '/cptjack/totem/yanyiting/Eight_classification/gray/8_focus/train'
validation_folders = '/cptjack/totem/yanyiting/Eight_classification/gray/8_focus/val/'

img_width,img_height = 224,224
batch_size_for_generators = 32
train_datagen = DataGenerator(rescale = 1./255,rotation_range=178,horizontal_flip=True,vertical_flip=True,shear_range=0.6,fill_mode='nearest',stain_transformation = True)
train_generator = train_datagen.flow_from_directory(train_folders,target_size = (img_width,img_height),batch_size = 32,class_mode = 'categorical')
validation_datagen = DataGenerator(rescale = 1./255)
validation_generator = validation_datagen.flow_from_directory(validation_folders,target_size=(img_width,img_height),
                                                              batch_size = 32,class_mode = 'categorical')

nb_train_samples = sum([len(files)for root,dirs,files in os.walk(train_folders)])
nb_validation_samples = sum([len(files)for root,dirs,files in os.walk(validation_folders)])




class Mycbk(ModelCheckpoint):
    def __init__(self, model, filepath ,monitor = 'val_loss',mode='min', save_best_only=True):
        self.single_model = model
        super(Mycbk,self).__init__(filepath, monitor, save_best_only, mode)
Beispiel #6
0
def train(nb_batch=32,
          nb_epochs=100,
          l_rate=1e-4,
          augmented=False,
          multi_gpu=0):

    # Load the data
    h5f = h5py.File(os.path.join(data_dir, "data.h5"), 'r')
    train_x, train_y = h5f['train_x'][:], h5f['train_y'][:]
    valid_x, valid_y = h5f['valid_x'][:], h5f['valid_y'][:]
    test_x, test_y = h5f['test_x'][:], h5f['test_y'][:]
    h5f.close()

    print("Data shapes: ", train_x.shape, valid_x.shape, test_x.shape)

    # Training parameters
    if multi_gpu:
        nb_batch = multi_gpu * nb_batch  # Assigning same batch size to all the gpus

    # Build the model
    model_input = Input(shape=(24, 24, 24, 16))
    model = Model(inputs=model_input, outputs=Squeeze_model(model_input))

    if multi_gpu: model = multi_gpu_model(model, gpus=multi_gpu)

    # Compile the model
    model.compile(optimizer=optimizers.adam(lr=l_rate,
                                            beta_1=0.99,
                                            beta_2=0.999),
                  loss='mean_squared_error')

    # checkpoint
    outputFolder = "weights"
    if not os.path.isdir(outputFolder): os.makedirs(outputFolder)
    weigts_filepath = os.path.join(outputFolder, "weights.h5")
    callbacks_list = [
        ModelCheckpoint(weigts_filepath,
                        monitor='val_loss',
                        verbose=1,
                        save_best_only=True,
                        save_weights_only=True,
                        mode='auto',
                        period=1)
    ]

    # Train without generators
    # history = model.fit(x=train_x, y=train_y,
    #                     batch_size=nb_batch,
    #                     epochs=nb_epochs,
    #                     callbacks=callbacks_list,
    #                     validation_data=(valid_x, valid_y),
    #                     verbose=True)

    # Generators
    if augmented:
        print("TRINING ON AUGMENTED DATA")
        data_gen = AugmentedDataGenerator(x=train_x,
                                          y=train_y,
                                          batch_size=nb_batch)
        val_gen = AugmentedDataGenerator(x=valid_x,
                                         y=valid_y,
                                         batch_size=nb_batch)
    else:
        data_gen = DataGenerator(x=train_x, y=train_y, batch_size=nb_batch)
        val_gen = DataGenerator(x=valid_x, y=valid_y, batch_size=nb_batch)

    # Train
    history = model.fit_generator(generator=data_gen,
                                  validation_data=val_gen,
                                  use_multiprocessing=False,
                                  epochs=nb_epochs,
                                  max_queue_size=10,
                                  workers=56,
                                  verbose=1,
                                  callbacks=callbacks_list)

    # Plot training history
    #plt.figure()
    #plt.plot(history['loss'])
    #plt.plot(history['val_loss'])
    #plt.xlabel("Epochs")
    #plt.ylabel("Loss (MSE)")
    #plt.legend(['Train Loss', 'Validation Loss'])
    #plt.savefig('training_history.png', format='png', dpi=1000)
    #plt.show()

    # Load the best weights
    model.load_weights(weigts_filepath)

    # Evaluate the model's performance
    train_r2 = r2_score(y_true=train_y, y_pred=model.predict(train_x))
    print("Train r2: ", train_r2)

    test_r2 = r2_score(y_true=test_y, y_pred=model.predict(test_x))
    print("Test r2: ", test_r2)
    'input_dir': train_folder,
    'samples': train_n,
    'label_dict': data,
    'image_shape': input_shape,
    'batch_size': batch_size,
    'augment': True,
    'shuffle': True
}

dev_params = {
    'input_dir': train_folder,
    'samples': dev_n,
    'label_dict': data,
    'image_shape': input_shape,
    'batch_size': batch_size,
    'augment': False,
    'shuffle': True
}

# create generators
train_generator = DataGenerator(**train_params)
validation_generator = DataGenerator(**dev_params)

# do the training
train_model(model,
            train_generator,
            validation_generator,
            epochs=num_epochs,
            use_multiprocessing=True,
            workers=4)
Beispiel #8
0
np.seterr(all='raise')

#plt.close("all")

if True:
    # Load data
    print('Loading data...')
    data = load_data.data()
    cfg = data.cfg
    labels = data.labels
    class_mapping = data.class_mapping

    # Create batch generators
    genTrain = DataGenerator(imagesMeta=data.trainMeta,
                             GTMeta=data.trainGTMeta,
                             cfg=cfg,
                             data_type='train')
    genVal = DataGenerator(imagesMeta=data.valMeta,
                           GTMeta=data.trainGTMeta,
                           cfg=cfg,
                           data_type='val')
    genTest = DataGenerator(imagesMeta=data.testMeta,
                            GTMeta=data.testGTMeta,
                            cfg=cfg,
                            data_type='test')

    stats, count = utils.getLabelStats(data.trainMeta, data.labels)

if True:
    # Save config
    utils.saveConfig(cfg)
Beispiel #9
0
valid_labels = np.array(valid_labels).astype(np.int32)
Y_valid = to_categorical(valid_labels,
                         num_classes=np.unique(valid_labels).shape[0])

print("\nBootstrapping to Balance - Training set size: %d (%d X %d)" %
      (train_labels.shape[0], MAX, np.unique(train_labels).shape[0]))
print("=" * 30, "\n")

#n_epochs = 70

batch_size_for_generators = 32

train_datagen = DataGenerator(rotation_range=178,
                              horizontal_flip=True,
                              vertical_flip=True,
                              shear_range=0.6,
                              stain_transformation=True)

train_gen = train_datagen.flow(train_images,
                               Y_train,
                               batch_size=batch_size_for_generators)

### VALIDATION ###

valid_datagen = DataGenerator()

valid_gen = valid_datagen.flow(valid_images,
                               Y_valid,
                               batch_size=batch_size_for_generators)
start = time.time()
Beispiel #10
0
early_stopping = EarlyStopping(monitor='val_acc', min_delta=0.001, patience=9)

checkpointer = ModelCheckpoint(filepath='./models/model_{}'.format(1),
                               verbose=True,
                               save_best_only=True)

# tb = TensorBoard(log_dir='./tf_logs/{}'.format(time.time()),
#                  batch_size=BATCH_SIZE,
#                  write_grads=True,
#                  write_graph=True,
#                  histogram_freq=1)

DG = DataGenerator(data=train['data'],
                   batch_size=BATCH_SIZE,
                   tensor_maker=TM,
                   shuffle=True,
                   sentences=True,
                   characters=False,
                   word_features=False,
                   tags=True)
VG = DataGenerator(data=valid['data'],
                   batch_size=BATCH_SIZE,
                   tensor_maker=TM,
                   shuffle=True,
                   sentences=True,
                   characters=False,
                   word_features=False,
                   tags=True)

model.fit_generator(generator=DG,
                    validation_data=VG,
                    validation_steps=len(VG),
Beispiel #11
0
import numpy as np
from sklearn import cross_validation, metrics

#test_folders = "./NCT/validation/"
test_folders = '/cptjack/totem/yanyiting/gray_focus_unfocus/gray/data/micro_png_224'

batch_size_for_generators = 32
nb_test_samples = sum(
    [len(files) for root, dirs, files in os.walk(test_folders)])
test_steps = nb_test_samples // batch_size_for_generators

print("\nImages for Testing")
print("=" * 30)

img_width, img_height = 224, 224
test_datagen = DataGenerator(rescale=1. / 255)
test_generator = test_datagen.flow_from_directory(test_folders,
                                                  target_size=(img_width,
                                                               img_height),
                                                  batch_size=32,
                                                  class_mode='categorical',
                                                  shuffle=False)

test_loss, test_accuracy = top_model.evaluate_generator(test_generator,
                                                        steps=test_steps,
                                                        verbose=1)

predictions = top_model.predict_generator(test_generator,
                                          steps=test_steps,
                                          verbose=1)
prediction_list = np.argmax(predictions, axis=1)
                       '_log.csv',
                       separator=',',
                       append=True)
    return [es, msave, reduce_lr, tb_log, log_cv]


folders = [
    '/cptjack/totem/yanyiting/gray_focus_unfocus/gray/data/2_focus/focus/',
    '/cptjack/totem/yanyiting/gray_focus_unfocus/gray/data/2_focus/unfocus/'
]
img_width, img_height = 224, 224
batch_size_for_generators = 32
train_datagen = DataGenerator(rescale=1. / 255,
                              rotation_range=178,
                              horizontal_flip=True,
                              vertical_flip=True,
                              shear_range=0.6,
                              fill_mode='nearest',
                              stain_transformation=True)
valid_datagen = DataGenerator(rescale=1. / 255)

all_images = []
all_labels = []

for index, folder in enumerate(folders):
    files = glob.glob(folder + "*.png")
    images = io.imread_collection(files)
    images = [image for image in images]  ### Reshape to (299, 299, 3) ###
    labels = [index] * len(images)
    all_images = all_images + images
    all_labels = all_labels + labels
Beispiel #13
0
    if classes_test.shape != classes_train.shape or not (
            numpy.array(classes_test) == numpy.array(classes_train)).all():
        print("Labels in train and test dataset are different")
        intersection = classes_test[numpy.in1d(classes_test, classes_train)]
        print(
            f"There is {len(intersection)} common labels, the rest is ignored."
        )
        train_df = train_df[train_df.label.isin(intersection)]
        test_df = test_df[test_df.label.isin(intersection)]

    NUM_CLASSES = len(train_df.label.unique())

    train_base_generator = DataGenerator(
        train_df,
        'image_path',
        'label',
        reduction=0.6,
        image_size=int(args.size),
        aug_sequence=get_augmenting_sequence(),
        batch_size=int(args.batch))

    from PIL import Image
    import random
    Path("train_renders").mkdir(exist_ok=True)
    counter = 0
    for data in train_base_generator:
        for image, label in zip(data[0], data[1]):
            Image.fromarray(image).save(
                "train_renders/" +
                f"{train_base_generator.one_hot_to_label(label)}_" +
                str(random.randint(0, 100)) + ".jpg")
        counter += 1
Beispiel #14
0
 def __init__(self, generator: DataGenerator = None, num_log_batches=1):
     super().__init__()
     self.generator = generator
     self.num_batches = num_log_batches
     # store full names of classes
     self.flat_class_names = generator.extract_labels()
Beispiel #15
0
def main(settings):
    logger = logging.getLogger(__name__)
    logger.setLevel(logging.INFO)

    # load settings
    settings_path = Path("settings/" + settings)
    with open(str(settings_path), "r") as file:
        settings_dict = json.load(file)

    logger.info(
        "Training seq2seq model on math module << {} >> and difficulty level << {} >>"
        .format(settings_dict["math_module"], settings_dict["train_level"]))

    logger.info("Using TensorFlow version: {}".format(tf.__version__))
    logger.info("GPU Available: {}".format(tf.test.is_gpu_available()))
    cpu_count = multiprocessing.cpu_count()
    logger.info("Number of CPUs: {}".format(cpu_count))

    data_gen_pars, input_texts, target_texts = get_sequence_data(settings_dict)

    training_generator = DataGenerator(input_texts=input_texts["train"],
                                       target_texts=target_texts["train"],
                                       **data_gen_pars)
    validation_generator = DataGenerator(input_texts=input_texts["valid"],
                                         target_texts=target_texts["valid"],
                                         **data_gen_pars)
    interpolate_generator = DataGenerator(
        input_texts=input_texts["interpolate"],
        target_texts=target_texts["interpolate"],
        **data_gen_pars)
    extrapolate_generator = DataGenerator(
        input_texts=input_texts["extrapolate"],
        target_texts=target_texts["extrapolate"],
        **data_gen_pars)

    valid_dict = {
        "validation": validation_generator,
        "interpolation": interpolate_generator,
        "extrapolation": extrapolate_generator,
    }

    history = NValidationSetsCallback(valid_dict)
    gradient = GradientLogger(live_metrics=["loss", "exact_match_metric"],
                              live_gaps=10)

    lstm = Seq2SeqLSTM(
        data_gen_pars["num_encoder_tokens"],
        data_gen_pars["num_decoder_tokens"],
        settings_dict["embedding_dim"],
    )

    model = lstm.get_model()
    print(model.summary())

    adam = Adam(
        lr=6e-4,
        beta_1=0.9,
        beta_2=0.995,
        epsilon=1e-9,
        decay=0.0,
        amsgrad=False,
        clipnorm=0.1,
    )

    model.compile(optimizer=adam,
                  loss="categorical_crossentropy",
                  metrics=[exact_match_metric])

    # directory where the checkpoints will be saved
    checkpoint_dir = settings_dict["save_path"] + "training_checkpoints"
    # name of the checkpoint files
    checkpoint_prefix = os.path.join(checkpoint_dir, "ckpt_{epoch}")

    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=checkpoint_prefix, save_weights_only=True)

    logger.info("Start training ...")
    # workers = cpu_count / 2 and no multiprocessing?
    train_hist = model.fit_generator(
        training_generator,
        epochs=settings_dict["epochs"],
        use_multiprocessing=False,
        workers=cpu_count // 2,
        callbacks=[history, gradient, checkpoint_callback],
        verbose=0,
    )

    # save callbacks data
    with open(settings_dict["save_path"] + "callbacks.pkl", "wb") as file:
        pickle.dump(train_hist.history, file)

    # save model
    model.save(settings_dict["save_path"] + "model.h5")

    # save settings
    with open(settings_dict["save_path"] + "settings.json", "w") as file:
        json.dump(settings_dict, file)
Beispiel #16
0
    print("Class: %s. Size: %d" % (folder.split("/")[-2], len(images)))

valid_images = np.stack(valid_images)
valid_images = (valid_images / 255).astype(np.float32)  ### Standardise

valid_labels = np.array(valid_labels).astype(np.int32)
Y_valid = to_categorical(valid_labels,
                         num_classes=np.unique(valid_labels).shape[0])

print("\nBootstrapping to Balance - Training set size: %d (%d X %d)" %
      (train_labels.shape[0], MAX, np.unique(train_labels).shape[0]))
print("=" * 40, "\n")
batch_size_for_generators = 64
train_datagen = DataGenerator(rotation_range=180,
                              horizontal_flip=True,
                              vertical_flip=True,
                              shear_range=0.6,
                              stain_transformation=True)

# train_gen = train_datagen.flow(train_images, Y_train, batch_size=batch_size_for_generators)

# VALIDATION

valid_datagen = DataGenerator()

valid_gen = valid_datagen.flow(valid_images,
                               Y_valid,
                               batch_size=batch_size_for_generators)
start = time.time()

Beispiel #17
0
sys.path.append('../../classification/models/')
sys.path.append('../../shared/')

import utils
from model_trainer import model_trainer
from load_data import data
from generators import DataGenerator

import numpy as np
import metrics
import image

if False:
    # Load data
    print('Loading data...')
    data = data(method='normal')
    cfg = data.cfg

    genTest = DataGenerator(imagesMeta=data.testMeta,
                            GTMeta=data.testGTMeta,
                            cfg=cfg,
                            data_type='test')

    gt_label, _, _ = image.getYData(genTest.dataID, genTest.imagesMeta,
                                    genTest.GTMeta, genTest.cfg)

path = 'C:\\Users\\aag14/Documents/Skole/Speciale/results/HICO/hoi80/yhat1'
yhat = utils.load_obj(path)

evalHOI = metrics.EvalResults(None, genTest, yhat=yhat, y=gt_label[0])
print(evalHOI.mAP, evalHOI.F1)