Exemple #1
0
def params_v8(verbose=True, path_to_checkpoints=""):
    """
    Returns params dict for v8 architecture
    :param verbose: Outputs information on model configuration
    :param path_to_checkpoints: Path to parent of checkpoints directory (include '/'!)
    :return: Params dict for DeepSphere model
    """
    params = dict()
    params['dir_name'] = path_to_checkpoints + "flask101-101-v8"

    # Types of layers.
    params[
        'conv'] = 'chebyshev5'  # Graph convolution: chebyshev5 or monomials.
    params['pool'] = 'max'  # Pooling: max or average.
    params[
        'activation'] = 'relu'  # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc.
    params[
        'statistics'] = 'mean'  # Statistics (for invariance): None, mean, var, meanvar, hist.

    # Architecture.
    params['F'] = [32, 32, 64, 64, 64, 64, 32,
                   32]  # Graph convolutional layers: number of feature maps.
    params['K'] = [5, 5, 5, 5, 5, 5, 5, 5]  # Polynomial orders.
    params['batch_norm'] = [False] * 8  # Batch normalization.
    params['M'] = [2]  # Fully connected layers: output dimensionalities.
    params['input_channel'] = 1  # Two channels (spherical maps) per sample.

    # Pooling.
    nsides = [
        NSIDE, NSIDE // 2, NSIDE // 4, NSIDE // 8, NSIDE // 16, NSIDE // 32,
        NSIDE // 64, NSIDE // 128, NSIDE // 256
    ]
    params['nsides'] = nsides
    params['indexes'] = utils.nside2indexes(nsides, ORDER)
    #     params['batch_norm_full'] = []

    # Regularization (to prevent over-fitting).
    params[
        'regularization'] = 0  # Amount of L2 regularization over the weights
    # (will be divided by the number of weights).
    params['dropout'] = 1  # Percentage of neurons to keep.

    # Training.
    params['num_epochs'] = 20  # Number of passes through the training data.
    params[
        'batch_size'] = 32  # Constant quantity of information (#pixels) per step (invariant to sample size).

    # Optimization: learning rate schedule and optimizer.
    params['scheduler'] = lambda step: 1e-4
    params['optimizer'] = lambda lr: tf.train.AdamOptimizer(
        lr, beta1=0.9, beta2=0.999, epsilon=1e-8)
    params['loss'] = 'custom2'  # Regression loss.

    # Number of model evaluations during training (influence training time).
    params['eval_frequency'] = 60

    if verbose:
        print('#sides: {}'.format(nsides))
        print('#pixels: {}'.format([(nside // ORDER)**2 for nside in nsides]))
        # Number of pixels on the full sphere: 12 * nsides**2.

        print('#samples per batch: {}'.format(params['batch_size']))
        print('=> #pixels per batch (input): {:,}'.format(
            params['batch_size'] * (NSIDE // ORDER)**2))
        print('=> #pixels for training (input): {:,}'.format(
            params['num_epochs'] * 422 * (NSIDE // ORDER)**2))

    return params
Exemple #2
0
def params_vdata1(exp_name,
                  input_channels,
                  nmaps,
                  nfilters,
                  verbose=True,
                  num_epochs=20,
                  learning_rate=1e-4,
                  decay_factor=0.999,
                  order=ORDER,
                  batch_size=16):
    """
    Returns params dict for vdata1 type architectures
    :param batch_size:
    :param nfilters:
    :param exp_name:
    :param order:
    :param decay_factor:
    :param nmaps:
    :param input_channels:
    :param learning_rate: Constant learning rate to use during training
    :param num_epochs: Number of epochs for training the model
    :param verbose: Outputs information on model configuration
    :return: Params dict for DeepSphere model
    """
    params = dict()
    params['dir_name'] = "flaskv2-vdata1-{}".format(exp_name)

    # Types of layers.
    params[
        'conv'] = 'chebyshev5'  # Graph convolution: chebyshev5 or monomials.
    params['pool'] = 'max'  # Pooling: max or average.
    params[
        'activation'] = 'relu'  # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc.
    params[
        'statistics'] = None  # Statistics (for invariance): None, mean, var, meanvar, hist.

    # Architecture.
    params['F'] = [nfilters
                   ] * 8  # Graph convolutional layers: number of feature maps.
    params['K'] = [5, 5, 5, 5, 5, 5, 5, 5]  # Polynomial orders.
    params['batch_norm'] = [True] * 7 + [False]  # Batch normalization.
    params['M'] = [1]  # Fully connected layers: output dimensionalities.
    params[
        'input_channel'] = input_channels  # Two channels (spherical maps) per sample.

    # Pooling.
    nsides = [
        NSIDE, NSIDE // 2, NSIDE // 4, NSIDE // 8, NSIDE // 16, NSIDE // 32,
        NSIDE // 64, NSIDE // 128, NSIDE // 256
    ]
    params['nsides'] = nsides
    params['indexes'] = utils.nside2indexes(nsides, order)
    #     params['batch_norm_full'] = []

    # Regularization (to prevent over-fitting).
    params[
        'regularization'] = 0  # Amount of L2 regularization over the weights
    # (will be divided by the number of weights).
    params['dropout'] = 0.8  # Percentage of neurons to keep.

    # Training.
    params[
        'num_epochs'] = num_epochs  # Number of passes through the training data.
    params[
        'batch_size'] = batch_size  # Constant quantity of information (#pixels) per step (invariant to sample size).

    # Optimization: learning rate schedule and optimizer.
    params['scheduler'] = lambda step: tf.train.exponential_decay(
        learning_rate, step, decay_steps=1, decay_rate=decay_factor)
    # params['scheduler'] = lambda step: learning_rate
    params['optimizer'] = lambda lr: tf.train.AdamOptimizer(
        lr, beta1=0.9, beta2=0.999, epsilon=1e-8)
    params['loss'] = 'l1'  # Regression loss.

    # Number of model evaluations during training (influence training time).
    params['eval_frequency'] = (12 * order * order * nmaps /
                                batch_size) / 3  # Thrice per epoch

    if verbose:
        print('#sides: {}'.format(nsides))
        print('#pixels: {}'.format([(nside // order)**2 for nside in nsides]))
        # Number of pixels on the full sphere: 12 * nsides**2.

        print('#samples per batch: {}'.format(params['batch_size']))
        print('=> #pixels per batch (input): {:,}'.format(
            params['batch_size'] * (NSIDE // order)**2))
        print('=> #pixels for training (input): {:,}'.format(
            params['num_epochs'] * 12 * order * order * nmaps *
            (NSIDE // order)**2))

    return params
Exemple #3
0
def params_v3(exp_name,
              convtype='chebyshev5',
              pooltype='max',
              nmaps=16,
              activation_func='relu',
              stat_layer=None,
              input_channels=1,
              gc_depth=8,
              nfilters=64,
              const_k=5,
              var_k=None,
              filters=None,
              batch_norm_output=False,
              var_batch_norm=None,
              fc_layers=[],
              num_outputs=1,
              reg_factor=0,
              dropout_rate=0.8,
              verbose=True,
              num_epochs=1,
              learning_rate=1e-4,
              decay_factor=0.999,
              decay_freq=1,
              decay_staircase=False,
              loss_func="l1",
              nside=NSIDE,
              nsides=None,
              order=ORDER,
              batch_size=16):
    """
    Returns params dict for v3 architectures

    :param convtype: Type of graph convolution performed ("chebyshev5" or "monomials").
    :param num_outputs: 1 for just sigma_8, 2 for sigma_8 and predicted log-variance q
    :param nsides: List of NSIDES for graph convolutional layers. Length = gc_depth
    :param nside: NSIDE of input maps. Should be 1024.
    :param loss_func: Choice of loss function ("l1", "custom1", "custom2", "l2"). Must be implemented in DeepSphere codebase.
    :param decay_staircase: If true, performs integer division in lr decay, decaying every decay_freq steps.
    :param decay_freq: If decay_staircase=true, acts to stagger decays. Otherwise, brings down decay factor.
    :param dropout_rate: Percentage of neurons kept.
    :param reg_factor: Multiplier for L2 Norm of weights.
    :param fc_layers: List of sizes of hidden fully connected layers (excluding the output layer).
    :param var_batch_norm: List of True/False values turning batch normalization on/off for each GC layer.
    :param batch_norm_output: Batch normalization value for the output layer (True/False). Ununsed if var_batch_norm is not None.
    :param var_k: List of GC orders K for each layer. Length = gc_depth.
    :param const_k: Constant K value for each GC layer. Unused if var_k is not None.
    :param stat_layer: Type of statistical layer applied for invariance. Can be None, mean, meanvar, var, or hist.
    :param pooltype: Type of pooling used for GC layers (max or avg).
    :param activation_func: Type of activation function applied for all GC and FC layers (relu, leaky_relu, elu, etc.).
    :param gc_depth: Number of GC layers in the network. Fixed at eight if NSIDE=1024 and pooling by two every layer.
    :param filters: List of # of filters for each GC layer. Length = gc_depth.
    :param batch_size: Batch size for training the network. Ideally a power of two.
    :param nfilters: Constant # of filters for each GC layer. Unused if filters is not None.
    :param exp_name: Experiment ID to define and track directories.
    :param order: HEALPIX order for partial-sky maps. Fixed at 2.
    :param decay_factor: Decay factor by which learning rate gets multiplied every decay_freq steps depending on decay_staircase.
    :param nmaps: Number of full-sky maps from which the training data is being generated.
    :param input_channels: Number of input partial-sky maps. 1 for convergence, 2 for shear, +1 for counts-in-cells.
    :param learning_rate: Initial learning rate to use during training
    :param num_epochs: Number of epochs for training the model
    :param verbose: Outputs information on model config

    :return: Params dict for DeepSphere model trained on FLASK v2 data without galaxy-matter bias. Doesn't allow for sophisticated regularization or loss functions.
    """
    params = dict()
    params['dir_name'] = "flaskv3-{}".format(exp_name)

    # Types of layers.
    params['conv'] = convtype  # Graph convolution: chebyshev5 or monomials.
    params['pool'] = pooltype  # Pooling: max or average.
    params[
        'activation'] = activation_func  # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc.
    params[
        'statistics'] = stat_layer  # Statistics (for invariance): None, mean, var, meanvar, hist.

    # Architecture.

    if filters is None:
        filters = [nfilters] * gc_depth

    if var_k is None:
        var_k = [const_k] * gc_depth

    if var_batch_norm is None:
        var_batch_norm = [True] * (gc_depth - 1) + [batch_norm_output]

    if nsides is None:
        nsides = [nside // (2**i) for i in range(gc_depth + 1)]

    params[
        'F'] = filters  # Graph convolutional layers: number of feature maps.
    params['K'] = var_k  # Polynomial orders.
    params['batch_norm'] = var_batch_norm  # Batch normalization.
    params['M'] = fc_layers + [
        num_outputs
    ]  # Fully connected layers: output dimensionalities.
    params[
        'input_channel'] = input_channels  # Two channels (spherical maps) per sample.

    # Pooling.
    params['nsides'] = nsides
    params['indexes'] = utils.nside2indexes(nsides, order)
    #     params['batch_norm_full'] = []

    # Regularization (to prevent over-fitting).
    params[
        'regularization'] = reg_factor  # Amount of L2 regularization over the weights
    # (will be divided by the number of weights).
    params['dropout'] = dropout_rate  # Percentage of neurons to keep.

    # Training.
    params[
        'num_epochs'] = num_epochs  # Number of passes through the training data.
    params[
        'batch_size'] = batch_size  # Constant quantity of information (#pixels) per step (invariant to sample size).

    # Optimization: learning rate schedule and optimizer.
    params['scheduler'] = lambda step: tf.train.exponential_decay(
        learning_rate,
        step,
        decay_steps=decay_freq,
        decay_rate=decay_factor,
        staircase=decay_staircase)
    # params['scheduler'] = lambda step: learning_rate
    params['optimizer'] = lambda lr: tf.train.AdamOptimizer(
        lr, beta1=0.9, beta2=0.999, epsilon=1e-8)
    params['loss'] = loss_func  # Regression loss.

    # Number of model evaluations during training (influence training time).
    params['eval_frequency'] = (12 * order * order * nmaps /
                                batch_size) / 3  # Thrice per epoch

    if verbose:
        print('#sides: {}'.format(nsides))
        print('#pixels: {}'.format([(nside // order)**2 for nside in nsides]))
        # Number of pixels on the full sphere: 12 * nsides**2.

        print('#samples per batch: {}'.format(params['batch_size']))
        print('=> #pixels per batch (input): {:,}'.format(
            params['batch_size'] * (NSIDE // order)**2))
        print('=> #pixels for training (input): {:,}'.format(
            params['num_epochs'] * 12 * order * order * nmaps *
            (NSIDE // order)**2))

    return params
def get_params(ntrain,
               EXP_NAME,
               order,
               Nside,
               architecture="FCN",
               verbose=True):
    """Parameters for the cgcnn and cnn2d defined in deepsphere/models.py"""

    n_classes = 2

    params = dict()
    params['dir_name'] = EXP_NAME

    # Types of layers.
    params[
        'conv'] = 'chebyshev5'  # Graph convolution: chebyshev5 or monomials.
    params['pool'] = 'max'  # Pooling: max or average.
    params[
        'activation'] = 'relu'  # Non-linearity: relu, elu, leaky_relu, softmax, tanh, etc.
    params[
        'statistics'] = 'mean'  # Statistics (for invariance): None, mean, var, meanvar, hist.

    # Architecture.
    params['F'] = [16, 32, 64, 64, 64, n_classes
                   ]  # Graph convolutional layers: number of feature maps.
    params['K'] = [5] * 6  # Polynomial orders.
    params['batch_norm'] = [True] * 6  # Batch normalization.
    params['M'] = []  # Fully connected layers: output dimensionalities.

    # Pooling.
    nsides = [
        Nside, Nside // 2, Nside // 4, Nside // 8, Nside // 16, Nside // 32,
        Nside // 32
    ]
    params['nsides'] = nsides
    params['indexes'] = utils.nside2indexes(nsides, order)
    #     params['batch_norm_full'] = []

    if architecture == "CNN":
        # Classical convolutional neural network.
        # Replace the last graph convolution and global average pooling by a fully connected layer.
        # That is, change the classifier while keeping the feature extractor.
        params['F'] = params['F'][:-1]
        params['K'] = params['K'][:-1]
        params['batch_norm'] = params['batch_norm'][:-1]
        params['nsides'] = params['nsides'][:-1]
        params['indexes'] = params['indexes'][:-1]
        params['statistics'] = None
        params['M'] = [n_classes]

    elif architecture == "FCN":
        # Fully convolutional neural network.
        pass

    elif architecture == 'FNN':
        # Fully connected neural network.
        raise NotImplementedError('This is not working!')
        params['F'] = []
        params['K'] = []
        params['batch_norm'] = []
        params['indexes'] = []
        params['statistics'] = None
        params['M'] = [
            128 * order * order, 1024 * order, 1024 * order, n_classes
        ]
        params['batch_norm_full'] = [True] * 3
        params['input_shape'] = (Nside // order)**2

    elif architecture == 'CNN-2d-big':
        params['F'] = params['F'][:-1]
        params['K'] = [[5, 5]] * 5
        params['p'] = [2, 2, 2, 2, 2]
        params['input_shape'] = [1024 // order, 1024 // order]
        params['batch_norm'] = params['batch_norm'][:-1]
        params['statistics'] = None
        params['M'] = [n_classes]
        del params['indexes']
        del params['nsides']
        del params['conv']

    elif architecture == 'FCN-2d-big':
        params['K'] = [[5, 5]] * 6
        params['p'] = [2, 2, 2, 2, 2, 1]
        params['input_shape'] = [1024 // order, 1024 // order]
        del params['indexes']
        del params['nsides']
        del params['conv']

    elif architecture == 'CNN-2d':
        params['F'] = [8, 16, 32, 32, 16]
        params['K'] = [[5, 5]] * 5
        params['p'] = [2, 2, 2, 2, 2]
        params['input_shape'] = [1024 // order, 1024 // order]
        params['batch_norm'] = params['batch_norm'][:-1]
        params['statistics'] = None
        params['M'] = [n_classes]
        del params['indexes']
        del params['nsides']
        del params['conv']

    elif architecture == 'FCN-2d':
        params['F'] = [8, 16, 32, 32, 16, 2]
        params['K'] = [[5, 5]] * 6
        params['p'] = [2, 2, 2, 2, 2, 1]
        params['input_shape'] = [1024 // order, 1024 // order]
        del params['indexes']
        del params['nsides']
        del params['conv']

    else:
        raise ValueError('Unknown architecture {}.'.format(architecture))

    # Regularization (to prevent over-fitting).
    params[
        'regularization'] = 0  # Amount of L2 regularization over the weights (will be divided by the number of weights).
    if '2d' in architecture:
        params['regularization'] = 3


#     elif architecture == 'FNN':
#         print('Use regularization new')
#         params['regularization'] = 10  # Amount of L2 regularization over the weights (will be divided by the number of weights).
#         params['dropout'] = 1  # Percentage of neurons to keep.
    params['dropout'] = 1  # Percentage of neurons to keep.

    # Training.
    params['num_epochs'] = 80  # Number of passes through the training data.
    params[
        'batch_size'] = 16 * order**2  # Constant quantity of information (#pixels) per step (invariant to sample size).

    # Optimization: learning rate schedule and optimizer.
    params['scheduler'] = lambda step: tf.train.exponential_decay(
        2e-4, step, decay_steps=1, decay_rate=0.999)
    params['optimizer'] = lambda lr: tf.train.AdamOptimizer(
        lr, beta1=0.9, beta2=0.999, epsilon=1e-8)

    # Number of model evaluations during training (influence training time).
    n_evaluations = 200
    params['eval_frequency'] = int(params['num_epochs'] * ntrain /
                                   params['batch_size'] / n_evaluations)

    if verbose:
        print('#sides: {}'.format(nsides))
        print('#pixels: {}'.format([(nside // order)**2 for nside in nsides]))
        # Number of pixels on the full sphere: 12 * nsides**2.

        print('#samples per batch: {}'.format(params['batch_size']))
        print('=> #pixels per batch (input): {:,}'.format(
            params['batch_size'] * (Nside // order)**2))
        print('=> #pixels for training (input): {:,}'.format(
            params['num_epochs'] * ntrain * (Nside // order)**2))

        n_steps = params['num_epochs'] * ntrain // params['batch_size']
        lr = [
            params['scheduler'](step).eval(session=tf.Session())
            for step in [0, n_steps]
        ]
        print(
            'Learning rate will start at {:.1e} and finish at {:.1e}.'.format(
                *lr))

    return params