Esempio n. 1
0
def export_trials(trials: hyperopt.Trials, path: str) -> None:
    def slim(source: hyperopt.Trials) -> hyperopt.Trials:
        """
        Strips trials to the basic values in order to pickle them
        """
        _trials = hyperopt.Trials()
        for tid, trial in enumerate(source.trials):
            docs = hyperopt.Trials().new_trial_docs(tids=[trial['tid']],
                                                    specs=[trial['spec']],
                                                    results=[trial['result']],
                                                    miscs=[trial['misc']])

            _trials.insert_trial_docs(docs)
            _trials.refresh()
        return _trials

    trials_pickle_path = os.path.join(path, TRIALS_PICKLE_FILE)
    trials_table_path = os.path.join(path, TRIALS_TABLE_FILE)

    xpath.prepare_path(trials_pickle_path)
    xpath.prepare_path(trials_table_path)

    logger.info('Exporting trials (pickled) to %s', trials_pickle_path)
    with tf.io.gfile.GFile(trials_pickle_path, 'wb') as fp:
        st = slim(trials)
        pickle.dump(st, file=fp)

    logger.info('Exporting trials table (csv) to %s', trials_table_path)
    df = convert_trials_to_data_frame(trials)
    with tf.io.gfile.GFile(trials_table_path, 'w') as fp:
        df.to_csv(fp, header=True, index=False)
Esempio n. 2
0
def parse_args() -> argparse.Namespace:
    """
    Parse cmd arguments
    :return: :class:`ArgumentParser` instance
    """
    arg_parser = argparse.ArgumentParser(description='FMNIST HyperParameter Search')
    arg_parser.add_argument('--spec', type=str, choices=[Spec.FCNN.name, Spec.CVNN.name, Spec.VGGN.name],
                            help='Model to tune.')
    arg_parser.add_argument('--num-epochs', type=int, default=2, help='Num training epochs for each experiment run')
    arg_parser.add_argument('--buffer-size', type=int, default=256, help='Capacity for the reading queue')
    arg_parser.add_argument('--num-threads', type=int, default=1, help='Number of threads for processing data')
    arg_parser.add_argument('--no-shuffle', dest='shuffle', action='store_false')
    arg_parser.add_argument('--job-dir', required=True, help='Path to job dir')
    arg_parser.add_argument('--model-dir', required=True, help='Path to model dir')
    arg_parser.add_argument('--train-data', required=True, help='Path to input data path')
    arg_parser.add_argument('--max-evaluations', type=int, required=False, default=2, help='Max number of experiments')
    arg_parser.add_argument('--spark-host', type=str, required=False, default=None,
                            help='Hostname of spark server to use Apache Spark for parallel tuning.')
    arg_parser.set_defaults(shuffle=True)

    args = arg_parser.parse_args()
    logger.info('Running with args:')
    for arg in vars(args):
        logger.info('\t%s: %s', arg, getattr(args, arg))

    return args
Esempio n. 3
0
    def fn(arrays: xtype.DataTuple, partition: int) -> None:
        import tempfile
        import uuid
        temporary_path = os.path.join(tempfile.gettempdir(),
                                      '{}.{}'.format(uuid.uuid4(), extension))

        file_path = os.path.join(path,
                                 'part-{:03}.{}'.format(partition, extension))
        xpath.prepare_path(file_path)
        with open(temporary_path, 'wb') as fp:
            np.savez(fp, *arrays)

        logger.info('Copying %s to %s', temporary_path, file_path)
        tf.io.gfile.copy(src=temporary_path, dst=file_path, overwrite=True)
Esempio n. 4
0
def tune(param_space: Dict[str, Any], objective_fn: Callable[[Dict[str, Any]], Dict[str, Any]], max_evaluations: int,
         spark_host: str) -> Tuple[Dict[str, Any], hyperopt.Trials]:
    start = time.time()

    if spark_host:
        import pyspark
        spark_session = pyspark.sql.SparkSession(pyspark.SparkContext(master=spark_host, appName=APP_NAME))
        trials = hyperopt.SparkTrials(spark_session=spark_session)
    else:
        trials = hyperopt.Trials()

    best_params = hyperopt.fmin(objective_fn,
                                param_space,
                                algo=tpe.suggest,
                                max_evals=max_evaluations,
                                trials=trials,
                                rstate=np.random.RandomState(1777))
    evaluated_best_params = hyperopt.space_eval(param_space, best_params)
    losses = [x['result']['loss'] for x in trials.trials]

    logger.info('Score best parameters: %f', min(losses) * -1)
    logger.info('Best parameters: %s', evaluated_best_params)
    logger.info('Time elapsed: %s', time.strftime("%H:%M:%S", time.gmtime(time.time() - start)))
    logger.info('Parameter combinations evaluated: %d', max_evaluations)

    return evaluated_best_params, trials
Esempio n. 5
0
        def wrapper_fn(guess_params: Dict[str, Any]):
            params = {k: v for k, v in guess_params.items() if not isinstance(v, dict)}
            params['num_blocks'] = guess_params['conv']['num_blocks']
            params['block_size'] = guess_params['conv']['block_size']
            signature = core.create_signature(
                params={**params, 'class': self.__class__.__name__}
            )
            task_job_dir = os.path.join(job_dir, signature)
            task_model_dir = os.path.join(model_dir, signature)

            logger.info('Running with config: %s', params)

            def train_fn(batch_size: int, learning_rate: float, fcl_dropout_rate: float, activation: str,
                         num_blocks: int, block_size: int,
                         fcl_num_layers: int, fcl_layer_size: int,
                         optimizer: str) -> Dict[str, Any]:
                from fmnist.learning.arch.vggn import train
                hps_loss, status = math.nan, hyperopt.STATUS_FAIL

                try:
                    metrics, export_path = train.train(base_data_dir, num_threads=num_threads,
                                                       buffer_size=buffer_size,
                                                       batch_size=batch_size, num_epochs=num_epochs,
                                                       shuffle=shuffle,
                                                       job_dir=task_job_dir, model_dir=task_model_dir,
                                                       learning_rate=learning_rate,
                                                       num_blocks=num_blocks, block_size=block_size,
                                                       fcl_dropout_rate=fcl_dropout_rate, activation=activation,
                                                       fcl_num_layers=fcl_num_layers,
                                                       fcl_layer_size=fcl_layer_size, optimizer_name=optimizer)
                    if math.isnan(metrics['sparse_categorical_accuracy']) or math.isnan(metrics['loss']):
                        status = hyperopt.STATUS_FAIL
                    else:
                        status = hyperopt.STATUS_OK
                    hps_loss = -math.pow(metrics['sparse_categorical_accuracy'], 2.0)
                except Exception as err:
                    logger.error(err)
                finally:
                    return {'loss': hps_loss, 'status': status,
                            'job_dir': task_job_dir, 'model_dir': task_model_dir,
                            'params': {**params, 'num_epochs': num_epochs, 'tuner': self.__class__.__name__}}

            return train_fn(batch_size=params['batch_size'], learning_rate=params['learning_rate'],
                            fcl_dropout_rate=params['fcl_dropout_rate'],
                            activation=params['activation'],
                            num_blocks=params['num_blocks'], block_size=params['block_size'],
                            fcl_num_layers=params['fcl_num_layers'],
                            fcl_layer_size=params['fcl_layer_size'], optimizer=params['optimizer'])
Esempio n. 6
0
def parse_args():
    arg_parser = argparse.ArgumentParser(
        'fminst-vgg19-embedding',
        description='Get VGG19 embeddings for FMNIST')
    arg_parser.add_argument('--train-data', required=True)
    arg_parser.add_argument('--batch-size',
                            required=False,
                            type=int,
                            default=32)
    arg_parser.add_argument('--job-dir', required=False, default=None)

    args = arg_parser.parse_args()

    logger.info('Running with arguments')
    for attr, value in vars(args).items():
        logger.info('%s: %s', attr, value)

    return args
Esempio n. 7
0
def main():
    args = parse_args()
    fpath = create_path_fn(args.train_data)

    df_prime = pd.read_csv(fpath(DataPaths.FMNIST, 'fashion-mnist_train.csv'))
    df_test = pd.read_csv(fpath(DataPaths.FMNIST, 'fashion-mnist_test.csv'))
    df_train, df_val = data_frame_split(df_prime, left_fraction=0.80)

    for df, split in zip((df_train, df_val, df_test),
                         ('train', 'val', 'test')):
        logger.info('Running partitioning pipeline for %s', split)
        ds = create_dataset(df)
        data_iter = create_generator(ds, batch_size=args.batch_size)

        export_fn = create_export_fn(fpath(DataPaths.INTERIM, split), 'npz')
        partition_export_fn = create_partitioning_fn(group_size=100,
                                                     agg_fn=agg_fn,
                                                     consumer_fn=export_fn)
        partition_export_fn(data_iter)
Esempio n. 8
0
        def wrapper_fn(params: Dict[str, Any]):
            signature = core.create_signature(
                params={**params, 'tuner': self.__class__.__name__}
            )
            task_job_dir = os.path.join(job_dir, signature)
            task_model_dir = os.path.join(model_dir, signature)

            logger.info('Running with config: %s', params)

            def train_fn(batch_size: int, learning_rate: float, dropout_rate: float, activation: str, num_layers: int,
                         layer_size: int, optimizer: str) -> Dict[str, Any]:
                from fmnist.learning.arch.fcnn import train
                hps_loss, status = math.nan, hyperopt.STATUS_FAIL

                try:
                    metrics, export_path = train.train(base_data_dir, num_threads=num_threads,
                                                       buffer_size=buffer_size,
                                                       batch_size=batch_size, num_epochs=num_epochs,
                                                       shuffle=shuffle,
                                                       job_dir=task_job_dir, model_dir=task_model_dir,
                                                       learning_rate=learning_rate,
                                                       dropout_rate=dropout_rate, activation=activation,
                                                       num_layers=num_layers,
                                                       layer_size=layer_size, optimizer_name=optimizer)
                    if math.isnan(metrics['sparse_categorical_accuracy']) or math.isnan(metrics['loss']):
                        status = hyperopt.STATUS_FAIL
                    else:
                        status = hyperopt.STATUS_OK
                    hps_loss = -math.pow(metrics['sparse_categorical_accuracy'], 2.0)
                except RuntimeError:
                    pass
                finally:
                    return {'loss': hps_loss, 'status': status,
                            'job_dir': task_job_dir, 'model_dir': task_model_dir,
                            'params': {**params, 'num_epochs': num_epochs, 'tuner': self.__class__.__name__}}

            return train_fn(batch_size=params['batch_size'], learning_rate=params['learning_rate'],
                            dropout_rate=params['dropout_rate'],
                            activation=params['activation'], num_layers=params['num_layers'],
                            layer_size=params['layer_size'], optimizer=params['optimizer'])
Esempio n. 9
0
def export_parameters(params: Dict[str, Any], path: str) -> None:
    evaluated_params_path = os.path.join(path, EVALUATED_PARAMS_FILE)
    logger.info('Exporting best params to %s', evaluated_params_path)
    xpath.prepare_path(evaluated_params_path)
    with tf.io.gfile.GFile(evaluated_params_path, 'w') as fp:
        json.dump(params, fp=fp, sort_keys=True)