コード例 #1
0
def train(args, use_comet: bool = True):

    data_cls = funcs[args['dataset']]
    model_cls = funcs[args['model']]
    network = funcs[args['network']]

    print('[INFO] Getting dataset...')
    data = data_cls()
    (x_train, y_train), (x_test, y_test) = data.load_data()
    classes = data.mapping

    # #Used for testing only
    # x_train = x_train[:100, :, :]
    # y_train = y_train[:100, :]
    # x_test = x_test[:100, :, :]
    # y_test = y_test[:100, :]
    # print ('[INFO] Training shape: ', x_train.shape, y_train.shape)
    # print ('[INFO] Test shape: ', x_test.shape, y_test.shape)
    # #delete these lines

    y_test_labels = [
        np.where(y_test[idx] == 1)[0][0] for idx in range(len(y_test))
    ]
    # distribute 90% test 10% val dataset with equal class distribution
    (x_test, x_valid, y_test,
     y_valid) = train_test_split(x_test,
                                 y_test,
                                 test_size=0.1,
                                 stratify=y_test_labels,
                                 random_state=42)

    print('[INFO] Training shape: ', x_train.shape, y_train.shape)
    print('[INFO] Validation shape: ', x_valid.shape, y_valid.shape)
    print('[INFO] Test shape: ', x_test.shape, y_test.shape)

    print('[INFO] Setting up the model..')
    model = model_cls(network, data_cls)
    print(model)

    dataset = dict({
        'x_train': x_train,
        'y_train': y_train,
        'x_valid': x_valid,
        'y_valid': y_valid,
        'x_test': x_test,
        'y_test': y_test
    })

    if use_comet and args['find_lr'] == False:
        #create an experiment with your api key
        experiment = Experiment(api_key='INSERT API KEY',
                                project_name='emnist',
                                auto_param_logging=False)

        print('[INFO] Starting Training...')
        #will log metrics with the prefix 'train_'
        with experiment.train():
            _ = train_model(model,
                            dataset,
                            batch_size=args['batch_size'],
                            epochs=args['epochs'],
                            name=args['network'])

        print('[INFO] Starting Testing...')
        #will log metrics with the prefix 'test_'
        with experiment.test():
            loss, score = model.evaluate(dataset, args['batch_size'])
            print(f'[INFO] Test evaluation: {score*100}')
            metrics = {'loss': loss, 'accuracy': score}
            experiment.log_metrics(metrics)

        experiment.log_parameters(args)
        experiment.log_dataset_hash(
            x_train)  #creates and logs a hash of your data
        experiment.end()

    elif use_comet and args['find_lr'] == True:

        _ = train_model(model,
                        dataset,
                        batch_size=args['batch_size'],
                        epochs=args['epochs'],
                        FIND_LR=args['find_lr'],
                        name=args['network'])

    else:

        print('[INFO] Starting Training...')
        train_model(model,
                    dataset,
                    batch_size=args['batch_size'],
                    epochs=args['epochs'],
                    name=args['network'])
        print('[INFO] Starting Testing...')
        loss, score = model.evaluate(dataset, args['batch_size'])
        print(f'[INFO] Test evaluation: {score*100}')

    if args['weights']:
        model.save_weights()

    if args['save_model']:
        model.save_model()
コード例 #2
0
def train(args, use_comet : bool = True):

    data_cls = funcs[args['dataset']]
    model_cls = funcs[args['model']]
    network = funcs[args['network']]

    print ('[INFO] Getting dataset...')
    data = data_cls()
    data.load_data()
    (x_train, y_train), (x_test, y_test) = (data.x_train, data.y_train), (data.x_test, data.y_test)
    classes = data.mapping
    
    # #Used for testing only
    # x_train = x_train[:100, :, :]
    # y_train = y_train[:100, :]
    # x_test = x_test[:100, :, :]
    # y_test = y_test[:100, :]
    # print ('[INFO] Training shape: ', x_train.shape, y_train.shape)
    # print ('[INFO] Test shape: ', x_test.shape, y_test.shape)
    # #delete these lines

    # distribute 90% test 10% val dataset with equal class distribution 
    (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.2, random_state=42)

    print ('[INFO] Training shape: ', x_train.shape, y_train.shape)
    print ('[INFO] Validation shape: ', x_valid.shape, y_valid.shape)
    print ('[INFO] Test shape: ', x_test.shape, y_test.shape)

    print ('[INFO] Setting up the model..')
    if args['network'] == 'lstmctc':
        network_args = {'backbone' : args['backbone'],
                        'seq_model' : args['seq'],
                        'bi' : args['bi']
                        }
        model = model_cls(network, data_cls, network_args)
    else:
        model = model_cls(network, data_cls)
    print (model)
    
    dataset = dict({
        'x_train' : x_train,
        'y_train' : y_train,
        'x_valid' : x_valid,
        'y_valid' : y_valid,
        'x_test' : x_test,
        'y_test' : y_test
    })

    if use_comet and args['find_lr'] == False:
        #create an experiment with your api key
        experiment = Experiment(api_key='WVBNRAfMLCBWslJAAsffxM4Gz',
                                project_name='iam_lines',
                                auto_param_logging=False)
        
        print ('[INFO] Starting Training...')
        #will log metrics with the prefix 'train_'   
        with experiment.train():
            _ = train_model(
                    model,
                    dataset,
                    batch_size=args['batch_size'],
                    epochs=args['epochs'],
                    name=args['network']
                    )

        print ('[INFO] Starting Testing...')    
        #will log metrics with the prefix 'test_'
        with experiment.test():  
            score = model.evaluate(dataset, int(args['batch_size']))
            print(f'[INFO] Test evaluation: {score*100}...')
            metrics = {
                'accuracy':score
            }
            experiment.log_metrics(metrics)    

        experiment.log_parameters(args)
        experiment.log_dataset_hash(x_train) #creates and logs a hash of your data 
        experiment.end()

    elif use_comet and args['find_lr'] == True:

        _ = train_model(
                    model,
                    dataset,
                    batch_size=args['batch_size'],
                    epochs=args['epochs'],
                    FIND_LR=args['find_lr'],
                    name=args['network']
                    )

    else :

        print ('[INFO] Starting Training...')
        train_model(
            model,
            dataset,
            batch_size=args['batch_size'],
            epochs=args['epochs'],
            name=args['network']
            )
        print ('[INFO] Starting Testing...')    
        score = model.evaluate(dataset, args['batch_size'])
        print(f'[INFO] Test evaluation: {score*100}...')

    if args['weights']:
        model.save_weights()
    
    if args['save_model']:
        model.save_model()
コード例 #3
0
def run_experiment(experiment_config: dict,
                   save_models: bool,
                   save_experiment: bool = True,
                   use_gcp: bool = True,
                   use_comet: bool = True):
    """
    Run a training experiment.
    Parameters
    ----------
    experiment_config (dict)
        Of the form
        {
            "dataset": "AirlineDataset",
            "dataset_args": {
                "max_overlap": 0.4,
                "subsample_fraction": 0.2
            },
            "gp": "gp_regression",
            "gp_args": {
                "inference_method": "laplace"
            },
            "train_args": {
                "eval_budget": 50,
                "verbose": 1
            }
        }
    save_models (bool)
        If True, will save the final models to a canonical location
    save_experiment (bool)
        If True, will save the experiment to a canonical location
    """
    print(f'Running experiment with config {experiment_config}')

    # Get dataset.
    datasets_module = importlib.import_module('src.datasets')
    dataset_class_ = getattr(datasets_module, experiment_config['dataset'])
    dataset_args = experiment_config.get('dataset_args', {})
    dataset = dataset_class_(**dataset_args)
    dataset.load_or_generate_data()
    print(dataset)

    # Get model selector.
    models_module = importlib.import_module('src.autoks.core.model_selection')
    model_class_ = getattr(models_module, experiment_config['model_selector'])

    # Get GP.
    gp_fn_ = experiment_config['gp']
    gp_args = experiment_config.get('gp_args', {})
    model_selector_args = experiment_config.get('model_selector_args', {})
    model_selector = model_class_(
        gp_fn=gp_fn_,
        gp_args=gp_args,
        **model_selector_args
    )
    print(model_selector)

    experiment_config['train_args'] = {**DEFAULT_TRAIN_ARGS, **experiment_config.get('train_args', {})}
    experiment_config['experiment_group'] = experiment_config.get('experiment_group', None)

    if use_gcp:
        gcp.init()

    experiment = None
    if use_comet:
        if _has_comet_ml:
            comet_config = experiment_config.get('comet_args', {})
            tags = comet_config.pop('tags', [])
            experiment = comet_ml.Experiment(**comet_config)
            experiment.add_tags(tags)
            experiment.log_parameters(experiment_config)
            experiment.log_dataset_hash(dataset.x)
            experiment.log_dataset_info(name=dataset.name)
        else:
            warnings.warn('Please install the `comet_ml` package to use Comet.')

    # Starting time of experiment (used if saving experiment)
    timestamp = str("_".join(str(datetime.today()).split(" "))).replace(":", "-")

    model, history = train_model(
        model_selector,
        dataset,
        eval_budget=experiment_config['train_args']['eval_budget'],
        verbose=experiment_config['train_args']['verbose'],
        use_gcp=use_gcp,
        comet_experiment=experiment
    )

    # Evaluate model selector.
    # TODO: clean this up - don't duplicate evaluation code.
    if experiment:
        with experiment.test():
            x_test, y_test = getattr(dataset, 'x_test', dataset.x), getattr(dataset, 'y_test', dataset.y)
            score = model_selector.evaluate(x_test, y_test)
            print(f'Test evaluation: {score}')
            experiment.log_metric("test_metric", score)
    else:
        x_test, y_test = getattr(dataset, 'x_test', dataset.x), getattr(dataset, 'y_test', dataset.y)
        score = model_selector.evaluate(x_test, y_test)
        print(f'Test evaluation: {score}')

    if use_gcp:
        logging.info({'test_metric': score})

    if save_models:
        model_selector.save_best_model()

    if save_experiment:
        # Create output dictionary.
        output_dict = dict()
        output_dict["history"] = history.to_dict()
        output_dict["dataset_cls"] = experiment_config['dataset']
        output_dict["dataset_args"] = dataset_args
        output_dict['model_selector'] = model_selector.to_dict()

        # Create results directories.
        DIR_NAME.mkdir(parents=True, exist_ok=True)
        exp_group_dir_name = DIR_NAME
        if experiment_config["experiment_group"]:
            exp_group_dir_name /= experiment_config['experiment_group'].replace(" ", "_")
        exp_group_dir_name.mkdir(parents=True, exist_ok=True)
        exp_dir_name = exp_group_dir_name / f'{model_selector.name}_{timestamp}_experiment'

        # Save to compressed output file.
        output_filename = str(exp_dir_name) + ".zip"
        with gzip.GzipFile(output_filename, 'w') as outfile:
            json_str = json.dumps(output_dict)
            json_bytes = json_str.encode('utf-8')
            outfile.write(json_bytes)

        if experiment:
            experiment.log_asset_data(output_dict, file_name=str(exp_dir_name) + ".json")

        if use_gcp:
            # Save output file(s) to bucket
            # TODO: this should be done in the background uploading everything in gcp.run.dir
            bucket_name = "automated-kernel-search"
            upload_blob(bucket_name, json_bytes, outfile.name)
            logging.info(f"Uploaded blob {outfile.name} to bucket {bucket_name}")

        return output_filename