def train(args, use_comet: bool = True): data_cls = funcs[args['dataset']] model_cls = funcs[args['model']] network = funcs[args['network']] print('[INFO] Getting dataset...') data = data_cls() (x_train, y_train), (x_test, y_test) = data.load_data() classes = data.mapping # #Used for testing only # x_train = x_train[:100, :, :] # y_train = y_train[:100, :] # x_test = x_test[:100, :, :] # y_test = y_test[:100, :] # print ('[INFO] Training shape: ', x_train.shape, y_train.shape) # print ('[INFO] Test shape: ', x_test.shape, y_test.shape) # #delete these lines y_test_labels = [ np.where(y_test[idx] == 1)[0][0] for idx in range(len(y_test)) ] # distribute 90% test 10% val dataset with equal class distribution (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.1, stratify=y_test_labels, random_state=42) print('[INFO] Training shape: ', x_train.shape, y_train.shape) print('[INFO] Validation shape: ', x_valid.shape, y_valid.shape) print('[INFO] Test shape: ', x_test.shape, y_test.shape) print('[INFO] Setting up the model..') model = model_cls(network, data_cls) print(model) dataset = dict({ 'x_train': x_train, 'y_train': y_train, 'x_valid': x_valid, 'y_valid': y_valid, 'x_test': x_test, 'y_test': y_test }) if use_comet and args['find_lr'] == False: #create an experiment with your api key experiment = Experiment(api_key='INSERT API KEY', project_name='emnist', auto_param_logging=False) print('[INFO] Starting Training...') #will log metrics with the prefix 'train_' with experiment.train(): _ = train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network']) print('[INFO] Starting Testing...') #will log metrics with the prefix 'test_' with experiment.test(): loss, score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}') metrics = {'loss': loss, 'accuracy': score} experiment.log_metrics(metrics) experiment.log_parameters(args) experiment.log_dataset_hash( x_train) #creates and logs a hash of your data experiment.end() elif use_comet and args['find_lr'] == True: _ = train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], FIND_LR=args['find_lr'], name=args['network']) else: print('[INFO] Starting Training...') train_model(model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network']) print('[INFO] Starting Testing...') loss, score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}') if args['weights']: model.save_weights() if args['save_model']: model.save_model()
def train(args, use_comet : bool = True): data_cls = funcs[args['dataset']] model_cls = funcs[args['model']] network = funcs[args['network']] print ('[INFO] Getting dataset...') data = data_cls() data.load_data() (x_train, y_train), (x_test, y_test) = (data.x_train, data.y_train), (data.x_test, data.y_test) classes = data.mapping # #Used for testing only # x_train = x_train[:100, :, :] # y_train = y_train[:100, :] # x_test = x_test[:100, :, :] # y_test = y_test[:100, :] # print ('[INFO] Training shape: ', x_train.shape, y_train.shape) # print ('[INFO] Test shape: ', x_test.shape, y_test.shape) # #delete these lines # distribute 90% test 10% val dataset with equal class distribution (x_test, x_valid, y_test, y_valid) = train_test_split(x_test, y_test, test_size=0.2, random_state=42) print ('[INFO] Training shape: ', x_train.shape, y_train.shape) print ('[INFO] Validation shape: ', x_valid.shape, y_valid.shape) print ('[INFO] Test shape: ', x_test.shape, y_test.shape) print ('[INFO] Setting up the model..') if args['network'] == 'lstmctc': network_args = {'backbone' : args['backbone'], 'seq_model' : args['seq'], 'bi' : args['bi'] } model = model_cls(network, data_cls, network_args) else: model = model_cls(network, data_cls) print (model) dataset = dict({ 'x_train' : x_train, 'y_train' : y_train, 'x_valid' : x_valid, 'y_valid' : y_valid, 'x_test' : x_test, 'y_test' : y_test }) if use_comet and args['find_lr'] == False: #create an experiment with your api key experiment = Experiment(api_key='WVBNRAfMLCBWslJAAsffxM4Gz', project_name='iam_lines', auto_param_logging=False) print ('[INFO] Starting Training...') #will log metrics with the prefix 'train_' with experiment.train(): _ = train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network'] ) print ('[INFO] Starting Testing...') #will log metrics with the prefix 'test_' with experiment.test(): score = model.evaluate(dataset, int(args['batch_size'])) print(f'[INFO] Test evaluation: {score*100}...') metrics = { 'accuracy':score } experiment.log_metrics(metrics) experiment.log_parameters(args) experiment.log_dataset_hash(x_train) #creates and logs a hash of your data experiment.end() elif use_comet and args['find_lr'] == True: _ = train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], FIND_LR=args['find_lr'], name=args['network'] ) else : print ('[INFO] Starting Training...') train_model( model, dataset, batch_size=args['batch_size'], epochs=args['epochs'], name=args['network'] ) print ('[INFO] Starting Testing...') score = model.evaluate(dataset, args['batch_size']) print(f'[INFO] Test evaluation: {score*100}...') if args['weights']: model.save_weights() if args['save_model']: model.save_model()
def run_experiment(experiment_config: dict, save_models: bool, save_experiment: bool = True, use_gcp: bool = True, use_comet: bool = True): """ Run a training experiment. Parameters ---------- experiment_config (dict) Of the form { "dataset": "AirlineDataset", "dataset_args": { "max_overlap": 0.4, "subsample_fraction": 0.2 }, "gp": "gp_regression", "gp_args": { "inference_method": "laplace" }, "train_args": { "eval_budget": 50, "verbose": 1 } } save_models (bool) If True, will save the final models to a canonical location save_experiment (bool) If True, will save the experiment to a canonical location """ print(f'Running experiment with config {experiment_config}') # Get dataset. datasets_module = importlib.import_module('src.datasets') dataset_class_ = getattr(datasets_module, experiment_config['dataset']) dataset_args = experiment_config.get('dataset_args', {}) dataset = dataset_class_(**dataset_args) dataset.load_or_generate_data() print(dataset) # Get model selector. models_module = importlib.import_module('src.autoks.core.model_selection') model_class_ = getattr(models_module, experiment_config['model_selector']) # Get GP. gp_fn_ = experiment_config['gp'] gp_args = experiment_config.get('gp_args', {}) model_selector_args = experiment_config.get('model_selector_args', {}) model_selector = model_class_( gp_fn=gp_fn_, gp_args=gp_args, **model_selector_args ) print(model_selector) experiment_config['train_args'] = {**DEFAULT_TRAIN_ARGS, **experiment_config.get('train_args', {})} experiment_config['experiment_group'] = experiment_config.get('experiment_group', None) if use_gcp: gcp.init() experiment = None if use_comet: if _has_comet_ml: comet_config = experiment_config.get('comet_args', {}) tags = comet_config.pop('tags', []) experiment = comet_ml.Experiment(**comet_config) experiment.add_tags(tags) experiment.log_parameters(experiment_config) experiment.log_dataset_hash(dataset.x) experiment.log_dataset_info(name=dataset.name) else: warnings.warn('Please install the `comet_ml` package to use Comet.') # Starting time of experiment (used if saving experiment) timestamp = str("_".join(str(datetime.today()).split(" "))).replace(":", "-") model, history = train_model( model_selector, dataset, eval_budget=experiment_config['train_args']['eval_budget'], verbose=experiment_config['train_args']['verbose'], use_gcp=use_gcp, comet_experiment=experiment ) # Evaluate model selector. # TODO: clean this up - don't duplicate evaluation code. if experiment: with experiment.test(): x_test, y_test = getattr(dataset, 'x_test', dataset.x), getattr(dataset, 'y_test', dataset.y) score = model_selector.evaluate(x_test, y_test) print(f'Test evaluation: {score}') experiment.log_metric("test_metric", score) else: x_test, y_test = getattr(dataset, 'x_test', dataset.x), getattr(dataset, 'y_test', dataset.y) score = model_selector.evaluate(x_test, y_test) print(f'Test evaluation: {score}') if use_gcp: logging.info({'test_metric': score}) if save_models: model_selector.save_best_model() if save_experiment: # Create output dictionary. output_dict = dict() output_dict["history"] = history.to_dict() output_dict["dataset_cls"] = experiment_config['dataset'] output_dict["dataset_args"] = dataset_args output_dict['model_selector'] = model_selector.to_dict() # Create results directories. DIR_NAME.mkdir(parents=True, exist_ok=True) exp_group_dir_name = DIR_NAME if experiment_config["experiment_group"]: exp_group_dir_name /= experiment_config['experiment_group'].replace(" ", "_") exp_group_dir_name.mkdir(parents=True, exist_ok=True) exp_dir_name = exp_group_dir_name / f'{model_selector.name}_{timestamp}_experiment' # Save to compressed output file. output_filename = str(exp_dir_name) + ".zip" with gzip.GzipFile(output_filename, 'w') as outfile: json_str = json.dumps(output_dict) json_bytes = json_str.encode('utf-8') outfile.write(json_bytes) if experiment: experiment.log_asset_data(output_dict, file_name=str(exp_dir_name) + ".json") if use_gcp: # Save output file(s) to bucket # TODO: this should be done in the background uploading everything in gcp.run.dir bucket_name = "automated-kernel-search" upload_blob(bucket_name, json_bytes, outfile.name) logging.info(f"Uploaded blob {outfile.name} to bucket {bucket_name}") return output_filename