Esempio n. 1
0
    check_randomness("Before fitting")

    logging.info("Fitting the network...")

    history = model.fit(train_x, train_y,
                        validation_data=(val_x,val_y),
                        epochs=EPOCHS,
                        batch_size=BATCH_SIZE,
                        sample_weight=train_y_weights,
                        callbacks=[metrics_callback],
                        shuffle=False)

    if SHOW_PLOTS :
        plots.plot_accuracy(history)
        plots.plot_loss(history)
        plots.plot_prf(metrics_callback)

    if SAVE_MODEL :
        model.save(MODEL_PATH)
        logging.info("Model saved in %s", MODEL_PATH)

else :
    logging.info("Loading existing model from %s...",MODEL_PATH)
    model = load_model(MODEL_PATH)
    logging.info("Completed loading model from file")


logging.info("Predicting on test set...")
output = model.predict(x=test_x, verbose=1)
logging.debug("Shape of output array: %s",np.shape(output))
Esempio n. 2
0
def main(
        df_path:
    str = '/project/cq-training-1/project1/data/catalog.helios.public.20100101-20160101.pkl',
        image_size: int = 32,
        model: str = 'dummy',
        epochs: int = 20,
        optimizer: str = 'adam',
        lr: float = 1e-4,
        batch_size: int = 100,
        subset_perc: float = 1,
        subset_dates: bool = False,
        saved_model_dir: str = None,
        seq_len: int = 6,
        seed: bool = True,
        scale_label: bool = True,
        use_csky: bool = False,
        cache: bool = True,
        timesteps_minutes: int = 15):

    # Warning if no GPU detected
    if len(tf.config.list_physical_devices('GPU')) == 0:
        logger.warning('No GPU detected, training will run on CPU.')
    elif len(tf.config.list_physical_devices('GPU')) > 1:
        logger.warning(
            'Multiple GPUs detected, training will run on only one GPU.')

    if subset_dates and subset_perc != 1:
        raise Exception(
            f'Invalid configuration. Argument --subset_dates=True and --subset_perc={subset_perc}.'
        )

    # Set random seed
    if seed:
        tf.random.set_seed(SEED)
        np.random.seed(SEED)

    # Load dataframe
    logger.info('Loading and preprocessing dataframe...')
    df = pd.read_pickle(df_path)
    df = preprocessing.preprocess(df, shuffle=False, scale_label=scale_label)
    metadata = data.Metadata(df, scale_label)

    # Pre-crop data
    logger.info('Getting crops...')
    images = data.Images(metadata, image_size)
    # images.crop(dest=SLURM_TMPDIR)
    images.crop(dest=images.shared_storage)

    # Split into train and valid
    if subset_dates:
        metadata_train, metadata_valid = metadata.split_with_dates()
    else:
        metadata, _ = metadata.split(1 - subset_perc)
        metadata_train, metadata_valid = metadata.split(VALID_PERC)
    nb_train_examples = metadata_train.get_number_of_examples()
    nb_valid_examples = metadata_valid.get_number_of_examples()
    logger.info(
        f'Number of training examples : {nb_train_examples}, number of validation examples : \
                {nb_valid_examples}')

    # Create model
    if model == 'dummy':
        model = baselines.DummyModel()
    elif model == 'sunset':
        model = baselines.SunsetModel()
    elif model == 'cnndem':
        model = baselines.ConvDemModel(image_size)
    elif model == 'sunset3d':
        model = baselines.Sunset3DModel()
    elif model == 'convlstm':
        model = baselines.ConvLSTM()
    elif model == 'cnngru':
        model = CnnGru(seq_len)
    elif model == 'cnngruatt':
        model = CnnGruAtt(seq_len)
    elif model == 'cnnlstm':
        model = LSTM_Resnet(seq_len)
    elif model == 'resnet':
        model = baselines.ResNetModel()
    else:
        raise Exception(f'Model "{model}" not recognized.')

    # Load model weights
    if saved_model_dir is not None:
        model.load_weights(os.path.join(saved_model_dir, "model"))

    # Loss and optimizer
    mse = tf.keras.losses.MeanSquaredError()
    if optimizer == 'adam':
        optimizer = tf.keras.optimizers.Adam(lr)
    elif optimizer == 'sgd':
        optimizer = tf.keras.optimizers.SGD(lr)
    else:
        raise Exception(f'Optimizer "{optimizer}" not recognized.')

    # Create data loader
    dataloader_train = SequenceDataset(
        metadata_train,
        images,
        seq_len,
        batch_size,
        timesteps=datetime.timedelta(minutes=timesteps_minutes),
        cache=cache)
    dataloader_valid = SequenceDataset(
        metadata_valid,
        images,
        seq_len,
        batch_size,
        timesteps=datetime.timedelta(minutes=timesteps_minutes),
        cache=cache)

    # Training loop
    logger.info('Training...')
    losses = {'train': [], 'valid': []}
    best_valid_loss = float('inf')
    for epoch in range(epochs):
        train_epoch(model, dataloader_train, batch_size, mse, optimizer,
                    nb_train_examples, scale_label, use_csky)
        test_epoch(model, dataloader_valid, batch_size, mse, nb_valid_examples,
                   scale_label, use_csky)
        train_loss = np.sqrt(train_mse_metric.result().numpy())
        valid_loss = np.sqrt(valid_mse_metric.result().numpy())
        csky_valid_loss = np.sqrt(valid_csky_mse_metric.result().numpy())

        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            utils.save_model(model)

        # Logs
        logger.info(
            f'Epoch {epoch} - Train Loss : {train_loss:.4f}, Valid Loss : {valid_loss:.4f}, Csky Valid Loss : \
                    {csky_valid_loss:.4f}')
        losses['train'].append(train_loss)
        losses['valid'].append(valid_loss)
        with train_summary_writer.as_default():
            tf.summary.scalar('loss', train_loss, step=epoch)
        with test_summary_writer.as_default():
            tf.summary.scalar('loss', valid_loss, step=epoch)

    # Plot losses
    plots.plot_loss(losses['train'], losses['valid'], csky_valid_loss)
Esempio n. 3
0
        if value < 60000:
            correct_test_losses.append(value)
        else: 
            correct_test_losses.append(correct_test_losses[-1])
    test_losses = np.array(correct_test_losses)

    correct_test_bpd = []
    for value in test_bpd:
        if value < 60000/(3072*np.log(2)):
            correct_test_bpd.append(value)
        else: 
            correct_test_bpd.append(correct_test_bpd[-1])
    test_bpd = np.array(correct_test_bpd)
    print(test_bpd)
    # Plot
    plot_loss(train_losses, 'Train Loss', 'output/cifar/train_loss.png')
    plot_loss(test_losses, 'Test Loss', 'output/cifar/test_loss.png')
    plot_bpd(train_bpd, 'Train bits/dim', 'output/cifar/train_bpd.png')
    plot_bpd(test_bpd, 'Test bits/dim', 'output/cifar/test_bpd.png')

    # 2) Dog CIFAR-10
    model = Trainer(lr=1e-5, epochs=50, device='cpu', subset=True, label='dog')
    print('[==> Visualize training images ...')
    model.visualize(fname='output/dogs_cifar_trainset.png') 
    model.build()
    # Load pre-trained model
    print('[==> Loading pre-trained model')
    model.load_model('input/pre_trained/dogs_cifar/net_final.model')
    print('> Sampling')
    model.save_samples('output/dogs_cifar/pre_trained_dogs_cifar.png')
    print('> Plotting loss and bits/dim')