Ejemplo n.º 1
0
class EnvironmentParamsContainer(task.Task):
    ''' Keeps track of a bunch of environment params.

    Uses the internal luigi parameter mechanism. The nice thing is that we can instantiate this class
    and get an object with all the environment variables set. This is arguably a bit of a hack.'''
    # TODO(erikbern): would be cleaner if we don't have to read config in global scope
    local_scheduler = parameter.BooleanParameter(is_global=True, default=False,
                                                 description='Use local scheduling')
    scheduler_host = parameter.Parameter(is_global=True, default=get_config().get('core', 'default-scheduler-host', default='localhost'),
                                         description='Hostname of machine running remote scheduler')
    scheduler_port = parameter.IntParameter(is_global=True, default=8082,
                                            description='Port of remote scheduler api process')
    lock = parameter.BooleanParameter(is_global=True, default=False,
                                      description='Do not run if the task is already running')
    lock_pid_dir = parameter.Parameter(is_global=True, default='/var/tmp/luigi',
                                       description='Directory to store the pid file')
    workers = parameter.IntParameter(is_global=True, default=1,
                                     description='Maximum number of parallel tasks to run')

    @classmethod
    def env_params(cls, override_defaults):
        # Override any global parameter with whatever is in override_defaults
        for param_name, param_obj in cls.get_global_params():
            if param_name in override_defaults:
                param_obj.set_default(override_defaults[param_name])

        return cls()  # instantiate an object with the global params set on it
Ejemplo n.º 2
0
class EnvironmentParamsContainer(task.Task):
    ''' Keeps track of a bunch of environment params.

    Uses the internal luigi parameter mechanism.
    The nice thing is that we can instantiate this class
    and get an object with all the environment variables set.
    This is arguably a bit of a hack.'''

    local_scheduler = parameter.BooleanParameter(
        is_global=True, default=False, description='Use local scheduling')
    scheduler_host = parameter.Parameter(
        is_global=True,
        default='localhost',
        description='Hostname of machine running remote scheduler',
        config_path=dict(section='core', name='default-scheduler-host'))
    scheduler_port = parameter.IntParameter(
        is_global=True,
        default=8082,
        description='Port of remote scheduler api process',
        config_path=dict(section='core', name='default-scheduler-port'))
    lock = parameter.BooleanParameter(
        is_global=True,
        default=False,
        description='(Deprecated, replaced by no_lock)'
        'Do not run if similar process is already running')
    lock_size = parameter.IntParameter(
        is_global=True,
        default=1,
        description="Maximum number of workers running the same command")
    no_lock = parameter.BooleanParameter(
        is_global=True,
        default=False,
        description='Ignore if similar process is already running')
    lock_pid_dir = parameter.Parameter(
        is_global=True,
        default='/var/tmp/luigi',
        description='Directory to store the pid file')
    workers = parameter.IntParameter(
        is_global=True,
        default=1,
        description='Maximum number of parallel tasks to run')
    logging_conf_file = parameter.Parameter(
        is_global=True,
        default=None,
        description='Configuration file for logging',
        config_path=dict(section='core', name='logging_conf_file'))
    module = parameter.Parameter(
        is_global=True,
        default=None,
        description='Used for dynamic loading of modules'
    )  # see DynamicArgParseInterface

    @classmethod
    def env_params(cls, override_defaults={}):
        # Override any global parameter with whatever is in override_defaults
        for param_name, param_obj in cls.get_global_params():
            if param_name in override_defaults:
                param_obj.set_global(override_defaults[param_name])

        return cls()  # instantiate an object with the global params set on it
Ejemplo n.º 3
0
class EnvironmentParamsContainer(task.Task):
    ''' Keeps track of a bunch of environment params.

    Uses the internal luigi parameter mechanism.
    The nice thing is that we can instantiate this class
    and get an object with all the environment variables set.
    This is arguably a bit of a hack.'''

    local_scheduler = parameter.BooleanParameter(
        is_global=True, default=False, description='Use local scheduling')
    scheduler_host = parameter.Parameter(
        is_global=True,
        default=None,
        description='Hostname of machine running remote scheduler')
    scheduler_port = parameter.IntParameter(
        is_global=True,
        default=None,
        description='Port of remote scheduler api process')
    lock = parameter.BooleanParameter(
        is_global=True,
        default=True,
        description='(Deprecated, replaced by no_lock)'
        'Do not run if similar process is already running')
    no_lock = parameter.BooleanParameter(
        is_global=True,
        default=False,
        description='Ignore if similar process is already running')
    lock_pid_dir = parameter.Parameter(
        is_global=True,
        default='/var/tmp/luigi',
        description='Directory to store the pid file')
    workers = parameter.IntParameter(
        is_global=True,
        default=1,
        description='Maximum number of parallel tasks to run')
    logging_conf_file = parameter.Parameter(
        is_global=True,
        default=None,
        description='Configuration file for logging')

    @classmethod
    def apply_config_defaults(cls):
        cls.scheduler_host.set_default(configuration.get_config().get(
            'core', 'default-scheduler-host', 'localhost'))
        cls.scheduler_port.set_default(configuration.get_config().get(
            'core', 'default-scheduler-port', 8082))
        cls.logging_conf_file.set_default(configuration.get_config().get(
            'core', 'logging_conf_file', None))

    @classmethod
    def env_params(cls, override_defaults):
        cls.apply_config_defaults()
        # Override any global parameter with whatever is in override_defaults
        for param_name, param_obj in cls.get_global_params():
            if param_name in override_defaults:
                param_obj.set_default(override_defaults[param_name])

        return cls()  # instantiate an object with the global params set on it
Ejemplo n.º 4
0
class EnvironmentParamsContainer(task.ConfigWithoutSection):
    ''' Keeps track of a bunch of environment params.

    Uses the internal luigi parameter mechanism.
    The nice thing is that we can instantiate this class
    and get an object with all the environment variables set.
    This is arguably a bit of a hack.
    '''

    local_scheduler = parameter.BoolParameter(
        default=False, description='Use local scheduling')
    scheduler_host = parameter.Parameter(
        default='localhost',
        description='Hostname of machine running remote scheduler',
        config_path=dict(section='core', name='default-scheduler-host'))
    scheduler_port = parameter.IntParameter(
        default=8082,
        description='Port of remote scheduler api process',
        config_path=dict(section='core', name='default-scheduler-port'))
    lock_size = parameter.IntParameter(
        default=1,
        description="Maximum number of workers running the same command")
    no_lock = parameter.BoolParameter(
        default=False,
        description='Ignore if similar process is already running')
    lock_pid_dir = parameter.Parameter(
        default=os.path.join(tempfile.gettempdir(), 'luigi'),
        description='Directory to store the pid file')
    workers = parameter.IntParameter(
        default=1, description='Maximum number of parallel tasks to run')
    logging_conf_file = parameter.Parameter(
        default=None,
        description='Configuration file for logging',
        config_path=dict(section='core', name='logging_conf_file'))
    module = parameter.Parameter(
        default=None, description='Used for dynamic loading of modules'
    )  # see DynamicArgParseInterface
    parallel_scheduling = parameter.BoolParameter(
        default=False,
        description='Use multiprocessing to do scheduling in parallel.',
        config_path={
            'section': 'core',
            'name': 'parallel-scheduling'
        },
    )
Ejemplo n.º 5
0
class EnvironmentParamsContainer(task.Task):
    ''' Keeps track of a bunch of environment params.

    Uses the internal luigi parameter mechanism. The nice thing is that we can instantiate this class
    and get an object with all the environment variables set. This is arguably a bit of a hack.'''
    # TODO(erikbern): would be cleaner if we don't have to read config in global scope
    local_scheduler = parameter.BooleanParameter(is_global=True, default=False,
                                                 description='Use local scheduling')
    scheduler_host = parameter.Parameter(is_global=True, default=get_config().get('core', 'default-scheduler-host', default='localhost'),
                                         description='Hostname of machine running remote scheduler')
    lock = parameter.BooleanParameter(is_global=True, default=False,
                                      description='Do not run if the task is already running')
    lock_pid_dir = parameter.Parameter(is_global=True, default='/var/tmp/luigi',
                                       description='Directory to store the pid file')
    workers = parameter.IntParameter(is_global=True, default=1,
                                     description='Maximum number of parallel tasks to run')
Ejemplo n.º 6
0
        dnn_model(x, y)

        x_score = x_test[genotype['feature']]
        y_score = y_test
        score = dnn_model.eval(x_score, y_score)

        scores.append(score)
    print('scores: {}, mean: {}, parameters: {}'.format(scores, np.mean(scores), genotype))

    info_dict = {'mean': np.mean(scores), 'parameters': genotype}
    g_info_list.append(info_dict)
    return np.mean(scores)


g_parameter_options = {
    'layers': parameter.IntParameter((1, 5)),
    'neurons': parameter.IntParameter((1, 5)),
    'activation': parameter.SingleChoiceParameter(['relu']),
    'loss_metric': parameter.SingleChoiceParameter(['binary_crossentropy']),
    'optimizer': parameter.SingleChoiceParameter(['adam']),
    'batch_norm': parameter.SingleChoiceParameter([True]),
    'dropout': parameter.FloatParameter((0.0, 0.2)),
    'last_layer_act': parameter.SingleChoiceParameter(['softmax']),
    'kernel_initializer': parameter.SingleChoiceParameter(['he_normal']),
    'feature': parameter.MultipleChoiceParameter(size=len(learning_data.iloc[:, :-1].columns.values.tolist()),
                                                 fixed_size=False,
                                                 value=learning_data.iloc[:, :-1].columns.values.tolist()),
    'statistical_op': parameter.SingleChoiceParameter(value=['standardize', 'normalize', 'do_nothing'])
}