def f(l00=0, l01=0, l02=0, l03=0, l04=0, l05=0, l06=0, l07=0, l08=0, l09=0, l10=0, l11=0, l12=0, l13=0, l14=0, l15=0, l16=0, l17=0, l18=0, l19=0): """ Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- l00~l19: int or float placeholders for hyperparameters being optimized, hyper_parameters dict is rebuilt based on input values of placeholders Returns: -------- valid_scores: float valid set performances """ args = locals() # Input hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(args[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(args[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] print(hyper_parameters) # Run benchmark if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) return valid_scores[self.model_class][metric[0].name] else: model_dir = tempfile.mkdtemp() model = self.model_class(hyper_parameters, model_dir) model.fit(train_dataset, **hyper_parameters) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance( [metric]) return multitask_scores[metric.name]
def run_benchmark(datasets, model, split=None, metric=None, direction=True, featurizer=None, n_features=0, out_path='.', hyper_parameters=None, hyper_param_search=False, max_iter=20, search_range=2, test=False, reload=True, seed=123): """ Run benchmark test on designated datasets with deepchem(or user-defined) model Parameters ---------- datasets: list of string choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl, clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba, pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast, uv, factors, kinase model: string or user-defined model stucture choice of which model to use, deepchem provides implementation of logistic regression, random forest, multitask network, bypass multitask network, irv, graph convolution; for user define model, it should include function: fit, evaluate split: string, optional (default=None) choice of splitter function, None = using the default splitter metric: string, optional (default=None) choice of evaluation metrics, None = using the default metrics(AUC & R2) direction: bool, optional(default=True) Optimization direction when doing hyperparameter search Maximization(True) or minimization(False) featurizer: string or dc.feat.Featurizer, optional (default=None) choice of featurization, None = using the default corresponding to model (string only applicable to deepchem models) n_features: int, optional(default=0) depending on featurizers, redefined when using deepchem featurizers, need to be specified for user-defined featurizers(if using deepchem models) out_path: string, optional(default='.') path of result file hyper_parameters: dict, optional (default=None) hyper parameters for designated model, None = use preset values hyper_param_search: bool, optional(default=False) whether to perform hyper parameter search, using gaussian process by default max_iter: int, optional(default=20) number of optimization trials search_range: int(float), optional(default=4) optimization on [initial values / search_range, initial values * search_range] test: boolean, optional(default=False) whether to evaluate on test set reload: boolean, optional(default=True) whether to save and reload featurized datasets """ for dataset in datasets: if dataset in [ 'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'pcba_146', 'pcba_2475', 'sider', 'tox21', 'toxcast' ]: mode = 'classification' if metric == None: metric = [ deepchem.metrics.Metric(deepchem.metrics.roc_auc_score, np.mean), ] elif dataset in [ 'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle', 'lipo', 'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9', 'sampl' ]: mode = 'regression' if metric == None: metric = [ deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score, np.mean) ] else: raise ValueError('Dataset not supported') if featurizer == None and isinstance(model, str): # Assigning featurizer if not user defined pair = (dataset, model) if pair in CheckFeaturizer: featurizer = CheckFeaturizer[pair][0] n_features = CheckFeaturizer[pair][1] else: continue if not split in [None] + CheckSplit[dataset]: continue loading_functions = { 'bace_c': deepchem.molnet.load_bace_classification, 'bace_r': deepchem.molnet.load_bace_regression, 'bbbp': deepchem.molnet.load_bbbp, 'chembl': deepchem.molnet.load_chembl, 'clearance': deepchem.molnet.load_clearance, 'clintox': deepchem.molnet.load_clintox, 'delaney': deepchem.molnet.load_delaney, 'factors': deepchem.molnet.load_factors, 'hiv': deepchem.molnet.load_hiv, 'hopv': deepchem.molnet.load_hopv, 'kaggle': deepchem.molnet.load_kaggle, 'kinase': deepchem.molnet.load_kinase, 'lipo': deepchem.molnet.load_lipo, 'muv': deepchem.molnet.load_muv, 'nci': deepchem.molnet.load_nci, 'pcba': deepchem.molnet.load_pcba, 'pcba_146': deepchem.molnet.load_pcba_146, 'pcba_2475': deepchem.molnet.load_pcba_2475, 'pdbbind': deepchem.molnet.load_pdbbind_grid, 'ppb': deepchem.molnet.load_ppb, 'qm7': deepchem.molnet.load_qm7_from_mat, 'qm7b': deepchem.molnet.load_qm7b_from_mat, 'qm8': deepchem.molnet.load_qm8, 'qm9': deepchem.molnet.load_qm9, 'sampl': deepchem.molnet.load_sampl, 'sider': deepchem.molnet.load_sider, 'tox21': deepchem.molnet.load_tox21, 'toxcast': deepchem.molnet.load_toxcast, 'uv': deepchem.molnet.load_uv, } print('-------------------------------------') print('Benchmark on dataset: %s' % dataset) print('-------------------------------------') # loading datasets if split is not None: print('Splitting function: %s' % split) tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, split=split, reload=reload) else: tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, reload=reload) train_dataset, valid_dataset, test_dataset = all_dataset time_start_fitting = time.time() train_score = {} valid_score = {} test_score = {} if hyper_param_search: if hyper_parameters is None: hyper_parameters = hps[model] search_mode = deepchem.hyper.GaussianProcessHyperparamOpt(model) hyper_param_opt, _ = search_mode.hyperparam_search( hyper_parameters, train_dataset, valid_dataset, transformers, metric, direction=direction, n_features=n_features, n_tasks=len(tasks), max_iter=max_iter, search_range=search_range) hyper_parameters = hyper_param_opt if isinstance(model, str): if mode == 'classification': train_score, valid_score, test_score = benchmark_classification( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) elif mode == 'regression': train_score, valid_score, test_score = benchmark_regression( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) else: model.fit(train_dataset) train_score['user_defined'] = model.evaluate(train_dataset, metric, transformers) valid_score['user_defined'] = model.evaluate(valid_dataset, metric, transformers) if test: test_score['user_defined'] = model.evaluate(test_dataset, metric, transformers) time_finish_fitting = time.time() with open(os.path.join(out_path, 'results.csv'), 'a') as f: writer = csv.writer(f) model_name = list(train_score.keys())[0] for i in train_score[model_name]: output_line = [ dataset, str(split), mode, model_name, i, 'train', train_score[model_name][i], 'valid', valid_score[model_name][i] ] if test: output_line.extend(['test', test_score[model_name][i]]) output_line.extend( ['time_for_running', time_finish_fitting - time_start_fitting]) writer.writerow(output_line) if hyper_param_search: with open(os.path.join(out_path, dataset + model + '.pkl'), 'w') as f: pickle.dump(hyper_parameters, f)
def run_benchmark(datasets, model, split=None, metric=None, featurizer=None, n_features=0, out_path='.', hyper_parameters=None, test=False, reload=True, seed=123): """ Run benchmark test on designated datasets with deepchem(or user-defined) model Parameters ---------- datasets: list of string choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl, clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba, pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast model: string or user-defined model stucture choice of which model to use, deepchem provides implementation of logistic regression, random forest, multitask network, bypass multitask network, irv, graph convolution; for user define model, it should include function: fit, evaluate split: string, optional (default=None) choice of splitter function, None = using the default splitter metric: string, optional (default=None) choice of evaluation metrics, None = using the default metrics(AUC & R2) featurizer: string or dc.feat.Featurizer, optional (default=None) choice of featurization, None = using the default corresponding to model (string only applicable to deepchem models) n_features: int, optional(default=0) depending on featurizers, redefined when using deepchem featurizers, need to be specified for user-defined featurizers(if using deepchem models) out_path: string, optional(default='.') path of result file hyper_parameters: dict, optional (default=None) hyper parameters for designated model, None = use preset values test: boolean, optional(default=False) whether to evaluate on test set reload: boolean, optional(default=True) whether to save and reload featurized datasets """ for dataset in datasets: if dataset in [ 'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'sider', 'tox21', 'toxcast' ]: mode = 'classification' if metric == None: metric = [ deepchem.metrics.Metric(deepchem.metrics.roc_auc_score, np.mean), ] elif dataset in [ 'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle', 'lipo', 'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9', 'sampl' ]: mode = 'regression' if metric == None: metric = [ deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score, np.mean) ] else: raise ValueError('Dataset not supported') if featurizer == None and isinstance(model, str): # Assigning featurizer if not user defined pair = (dataset, model) if pair in CheckFeaturizer: featurizer = CheckFeaturizer[pair][0] n_features = CheckFeaturizer[pair][1] else: continue if not split in [None] + CheckSplit[dataset]: continue loading_functions = { 'bace_c': deepchem.molnet.load_bace_classification, 'bace_r': deepchem.molnet.load_bace_regression, 'bbbp': deepchem.molnet.load_bbbp, 'chembl': deepchem.molnet.load_chembl, 'clearance': deepchem.molnet.load_clearance, 'clintox': deepchem.molnet.load_clintox, 'delaney': deepchem.molnet.load_delaney, 'hiv': deepchem.molnet.load_hiv, 'hopv': deepchem.molnet.load_hopv, 'kaggle': deepchem.molnet.load_kaggle, 'lipo': deepchem.molnet.load_lipo, 'muv': deepchem.molnet.load_muv, 'nci': deepchem.molnet.load_nci, 'pcba': deepchem.molnet.load_pcba, 'pdbbind': deepchem.molnet.load_pdbbind_grid, 'ppb': deepchem.molnet.load_ppb, 'qm7': deepchem.molnet.load_qm7_from_mat, 'qm7b': deepchem.molnet.load_qm7b_from_mat, 'qm8': deepchem.molnet.load_qm8, 'qm9': deepchem.molnet.load_qm9, 'sampl': deepchem.molnet.load_sampl, 'sider': deepchem.molnet.load_sider, 'tox21': deepchem.molnet.load_tox21, 'toxcast': deepchem.molnet.load_toxcast } print('-------------------------------------') print('Benchmark on dataset: %s' % dataset) print('-------------------------------------') # loading datasets if split is not None: print('Splitting function: %s' % split) tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, split=split, reload=reload) else: tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, reload=reload) train_dataset, valid_dataset, test_dataset = all_dataset time_start_fitting = time.time() train_score = {} valid_score = {} test_score = {} if isinstance(model, str): if mode == 'classification': train_score, valid_score, test_score = benchmark_classification( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) elif mode == 'regression': train_score, valid_score, test_score = benchmark_regression( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) else: model.fit(train_dataset) train_score['user_defined'] = model.evaluate( train_dataset, metric, transformers) valid_score['user_defined'] = model.evaluate( valid_dataset, metric, transformers) if test: test_score['user_defined'] = model.evaluate( test_dataset, metric, transformers) time_finish_fitting = time.time() with open(os.path.join(out_path, 'results.csv'), 'a') as f: writer = csv.writer(f) model_name = list(train_score.keys())[0] for i in train_score[model_name]: output_line = [ dataset, str(split), mode, model_name, i, 'train', train_score[model_name][i], 'valid', valid_score[model_name][i] ] if test: output_line.extend(['test', test_score[model_name][i]]) output_line.extend([ 'time_for_running', time_finish_fitting - time_start_fitting ]) writer.writerow(output_line)
def hyperparam_search( self, params_dict, train_dataset, valid_dataset, output_transformers, metric, direction=True, n_features=1024, n_tasks=1, max_iter=20, search_range=4, hp_invalid_list=[ 'seed', 'nb_epoch', 'penalty_type', 'dropouts', 'bypass_dropouts', 'n_pair_feat', 'fit_transformers', 'min_child_weight', 'max_delta_step', 'subsample', 'colsample_bylevel', 'colsample_bytree', 'reg_alpha', 'reg_lambda', 'scale_pos_weight', 'base_score' ], log_file='GPhypersearch.log'): """Perform hyperparams search using a gaussian process assumption params_dict include single-valued parameters being optimized, which should only contain int, float and list of int(float) parameters with names in hp_invalid_list will not be changed. For Molnet models, self.model_class is model name in string, params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class] Parameters ---------- params_dict: dict dict including parameters and their initial values parameters not suitable for optimization can be added to hp_invalid_list train_dataset: dc.data.Dataset struct dataset used for training valid_dataset: dc.data.Dataset struct dataset used for validation(optimization on valid scores) output_transformers: list of dc.trans.Transformer transformers for evaluation metric: list of dc.metrics.Metric metric used for evaluation direction: bool maximization(True) or minimization(False) n_features: int number of input features n_tasks: int number of tasks max_iter: int number of optimization trials search_range: int(float) optimization on [initial values / search_range, initial values * search_range] hp_invalid_list: list names of parameters that should not be optimized logfile: string name of log file, hyperparameters and results for each trial will be recorded Returns ------- hyper_parameters: dict params_dict with all optimized values valid_performance_opt: float best performance on valid dataset """ assert len(metric) == 1, 'Only use one metric' hyper_parameters = params_dict hp_list = hyper_parameters.keys() for hp in hp_invalid_list: if hp in hp_list: hp_list.remove(hp) hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list] assert set(hp_list_class) <= set([list, int, float]) # Float or int hyper parameters(ex. batch_size, learning_rate) hp_list_single = [ hp_list[i] for i in range(len(hp_list)) if not hp_list_class[i] is list ] # List of float or int hyper parameters(ex. layer_sizes) hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]])) for i in range(len(hp_list)) if hp_list_class[i] is list] # Number of parameters n_param = len(hp_list_single) if len(hp_list_multiple) > 0: n_param = n_param + sum([hp[1] for hp in hp_list_multiple]) # Range of optimization param_range = [] for hp in hp_list_single: if hyper_parameters[hp].__class__ is int: param_range.append((('int'), [ hyper_parameters[hp] // search_range, hyper_parameters[hp] * search_range ])) else: param_range.append((('cont'), [ hyper_parameters[hp] / search_range, hyper_parameters[hp] * search_range ])) for hp in hp_list_multiple: if hyper_parameters[hp[0]][0].__class__ is int: param_range.extend([(('int'), [ hyper_parameters[hp[0]][i] // search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) else: param_range.extend([(('cont'), [ hyper_parameters[hp[0]][i] / search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) # Dummy names param_name = ['l' + format(i, '02d') for i in range(20)] param = dict(zip(param_name[:n_param], param_range)) data_dir = os.environ['DEEPCHEM_DATA_DIR'] log_file = os.path.join(data_dir, log_file) def f(l00=0, l01=0, l02=0, l03=0, l04=0, l05=0, l06=0, l07=0, l08=0, l09=0, l10=0, l11=0, l12=0, l13=0, l14=0, l15=0, l16=0, l17=0, l18=0, l19=0): """ Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- l00~l19: int or float placeholders for hyperparameters being optimized, hyper_parameters dict is rebuilt based on input values of placeholders Returns: -------- valid_scores: float valid set performances """ args = locals() # Input hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(args[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(args[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] logger.info(hyper_parameters) # Run benchmark with open(log_file, 'a') as f: # Record hyperparameters f.write(str(hyper_parameters)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) score = valid_scores[self.model_class][metric[0].name] else: model_dir = tempfile.mkdtemp() model = self.model_class(hyper_parameters, model_dir) model.fit(train_dataset, **hyper_parameters) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance(metric) score = multitask_scores[metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') # GPGO maximize performance by default, set performance to its negative value for minimization if direction: return score else: return -score import pyGPGO from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO cov = matern32() gp = GaussianProcess(cov) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) logger.info("Max number of iteration: %i" % max_iter) gpgo.run(max_iter=max_iter) hp_opt, valid_performance_opt = gpgo.getResult() # Readout best hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(hp_opt[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(hp_opt[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] # Compare best model to default hyperparameters with open(log_file, 'a') as f: # Record hyperparameters f.write(str(params_dict)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=params_dict) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=params_dict) score = valid_scores[self.model_class][metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') if not direction: score = -score if score > valid_performance_opt: # Optimized model is better, return hyperparameters return params_dict, score # Return default hyperparameters return hyper_parameters, valid_performance_opt
def run_benchmark(datasets, model, split=None, metric=None, featurizer=None, n_features=0, out_path='.', hyper_parameters=None, test=False, seed=123): """ Run benchmark test on designated datasets with deepchem(or user-defined) model Parameters ---------- datasets: list of string choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl, clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba, pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast model: string or user-defined model stucture choice of which model to use, deepchem provides implementation of logistic regression, random forest, multitask network, bypass multitask network, irv, graph convolution; for user define model, it should include function: fit, evaluate split: string, optional (default=None) choice of splitter function, None = using the default splitter metric: string, optional (default=None) choice of evaluation metrics, None = using the default metrics(AUC & R2) featurizer: string or dc.feat.Featurizer, optional (default=None) choice of featurization, None = using the default corresponding to model (string only applicable to deepchem models) n_features: int, optional(default=0) depending on featurizers, redefined when using deepchem featurizers, need to be specified for user-defined featurizers(if using deepchem models) out_path: string, optional(default='.') path of result file hyper_parameters: dict, optional (default=None) hyper parameters for designated model, None = use preset values test: boolean, optional(default=False) whether to evaluate on test set """ for dataset in datasets: if dataset in [ 'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'sider', 'tox21', 'toxcast' ]: mode = 'classification' if metric == None: metric = str('auc') elif dataset in [ 'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle', 'lipo', 'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9', 'sampl' ]: mode = 'regression' if metric == None: metric = str('r2') else: raise ValueError('Dataset not supported') metric_all = { 'auc': deepchem.metrics.Metric(deepchem.metrics.roc_auc_score, np.mean), 'r2': deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score, np.mean) } if isinstance(metric, str): metric = [metric_all[metric]] if featurizer == None and isinstance(model, str): # Assigning featurizer if not user defined pair = (dataset, model) if pair in CheckFeaturizer: featurizer = CheckFeaturizer[pair][0] n_features = CheckFeaturizer[pair][1] else: continue if not split in [None] + CheckSplit[dataset]: continue loading_functions = { 'bace_c': deepchem.molnet.load_bace_classification, 'bace_r': deepchem.molnet.load_bace_regression, 'bbbp': deepchem.molnet.load_bbbp, 'chembl': deepchem.molnet.load_chembl, 'clearance': deepchem.molnet.load_clearance, 'clintox': deepchem.molnet.load_clintox, 'delaney': deepchem.molnet.load_delaney, 'hiv': deepchem.molnet.load_hiv, 'hopv': deepchem.molnet.load_hopv, 'kaggle': deepchem.molnet.load_kaggle, 'lipo': deepchem.molnet.load_lipo, 'muv': deepchem.molnet.load_muv, 'nci': deepchem.molnet.load_nci, 'pcba': deepchem.molnet.load_pcba, 'pdbbind': deepchem.molnet.load_pdbbind_grid, 'ppb': deepchem.molnet.load_ppb, 'qm7': deepchem.molnet.load_qm7_from_mat, 'qm7b': deepchem.molnet.load_qm7b_from_mat, 'qm8': deepchem.molnet.load_qm8, 'qm9': deepchem.molnet.load_qm9, 'sampl': deepchem.molnet.load_sampl, 'sider': deepchem.molnet.load_sider, 'tox21': deepchem.molnet.load_tox21, 'toxcast': deepchem.molnet.load_toxcast } print('-------------------------------------') print('Benchmark on dataset: %s' % dataset) print('-------------------------------------') # loading datasets if split is not None: print('Splitting function: %s' % split) tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, split=split) else: tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer) train_dataset, valid_dataset, test_dataset = all_dataset time_start_fitting = time.time() train_score = {} valid_score = {} test_score = {} if isinstance(model, str): if mode == 'classification': train_score, valid_score, test_score = benchmark_classification( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) elif mode == 'regression': train_score, valid_score, test_score = benchmark_regression( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) else: model.fit(train_dataset) train_score['user_defined'] = model.evaluate(train_dataset, metric, transformers) valid_score['user_defined'] = model.evaluate(valid_dataset, metric, transformers) if test: test_score['user_defined'] = model.evaluate(test_dataset, metric, transformers) time_finish_fitting = time.time() with open(os.path.join(out_path, 'results.csv'), 'a') as f: writer = csv.writer(f) for i in train_score: output_line = [ dataset, str(split), mode, 'train', i, train_score[i][list(train_score[i].keys())[0]], 'valid', i, valid_score[i][list(valid_score[i].keys())[0]] ] if test: output_line.extend( ['test', i, test_score[i][list(test_score[i].keys())[0]]]) output_line.extend( ['time_for_running', time_finish_fitting - time_start_fitting]) writer.writerow(output_line)
test, tasks, transformers, n_features, metric, model, test=False, hyper_parameters=hyper_parameters, seed=seed) elif mode == 'regression': train_score, valid_score, test_score = benchmark_regression( train, valid, test, tasks, transformers, n_features, metric, model, test=False, hyper_parameters=hyper_parameters, seed=seed) with open( os.path.join(out_path, 'results_frac_train_curve.csv'), 'a') as f: writer = csv.writer(f) model_name = list(train_score.keys())[0] for i in train_score[model_name]: output_line = [ dataset, str(split), mode, model_name, i, 'train', train_score[model_name][i], 'valid',
def hyperparam_search( self, params_dict, train_dataset, valid_dataset, output_transformers, metric, direction=True, n_features=1024, n_tasks=1, max_iter=20, search_range=4, hp_invalid_list=[ 'seed', 'nb_epoch', 'penalty_type', 'dropouts', 'bypass_dropouts', 'n_pair_feat', 'fit_transformers', 'min_child_weight', 'max_delta_step', 'subsample', 'colsample_bylevel', 'colsample_bytree', 'reg_alpha', 'reg_lambda', 'scale_pos_weight', 'base_score' ], log_file='GPhypersearch.log'): """Perform hyperparams search using a gaussian process assumption params_dict include single-valued parameters being optimized, which should only contain int, float and list of int(float) parameters with names in hp_invalid_list will not be changed. For Molnet models, self.model_class is model name in string, params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class] Parameters ---------- params_dict: dict dict including parameters and their initial values parameters not suitable for optimization can be added to hp_invalid_list train_dataset: dc.data.Dataset struct dataset used for training valid_dataset: dc.data.Dataset struct dataset used for validation(optimization on valid scores) output_transformers: list of dc.trans.Transformer transformers for evaluation metric: list of dc.metrics.Metric metric used for evaluation direction: bool maximization(True) or minimization(False) n_features: int number of input features n_tasks: int number of tasks max_iter: int number of optimization trials search_range: int(float) optimization on [initial values / search_range, initial values * search_range] hp_invalid_list: list names of parameters that should not be optimized logfile: string name of log file, hyperparameters and results for each trial will be recorded Returns ------- hyper_parameters: dict params_dict with all optimized values valid_performance_opt: float best performance on valid dataset """ assert len(metric) == 1, 'Only use one metric' hyper_parameters = params_dict hp_list = list(hyper_parameters.keys()) for hp in hp_invalid_list: if hp in hp_list: hp_list.remove(hp) hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list] assert set(hp_list_class) <= set([list, int, float]) # Float or int hyper parameters(ex. batch_size, learning_rate) hp_list_single = [ hp_list[i] for i in range(len(hp_list)) if not hp_list_class[i] is list ] # List of float or int hyper parameters(ex. layer_sizes) hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]])) for i in range(len(hp_list)) if hp_list_class[i] is list] # Number of parameters n_param = len(hp_list_single) if len(hp_list_multiple) > 0: n_param = n_param + sum([hp[1] for hp in hp_list_multiple]) # Range of optimization param_range = [] for hp in hp_list_single: if hyper_parameters[hp].__class__ is int: param_range.append((('int'), [ hyper_parameters[hp] // search_range, hyper_parameters[hp] * search_range ])) else: param_range.append((('cont'), [ hyper_parameters[hp] / search_range, hyper_parameters[hp] * search_range ])) for hp in hp_list_multiple: if hyper_parameters[hp[0]][0].__class__ is int: param_range.extend([(('int'), [ hyper_parameters[hp[0]][i] // search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) else: param_range.extend([(('cont'), [ hyper_parameters[hp[0]][i] / search_range, hyper_parameters[hp[0]][i] * search_range ]) for i in range(hp[1])]) # Dummy names param_name = ['l' + format(i, '02d') for i in range(20)] param = dict(zip(param_name[:n_param], param_range)) data_dir = os.environ['DEEPCHEM_DATA_DIR'] log_file = os.path.join(data_dir, log_file) def f(l00=0, l01=0, l02=0, l03=0, l04=0, l05=0, l06=0, l07=0, l08=0, l09=0, l10=0, l11=0, l12=0, l13=0, l14=0, l15=0, l16=0, l17=0, l18=0, l19=0): """ Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- l00~l19: int or float placeholders for hyperparameters being optimized, hyper_parameters dict is rebuilt based on input values of placeholders Returns: -------- valid_scores: float valid set performances """ args = locals() # Input hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(args[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(args[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] logger.info(hyper_parameters) # Run benchmark with open(log_file, 'a') as f: # Record hyperparameters f.write(str(hyper_parameters)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) score = valid_scores[self.model_class][metric[0].name] else: model_dir = tempfile.mkdtemp() model = self.model_class(hyper_parameters, model_dir) model.fit(train_dataset, **hyper_parameters) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance(metric) score = multitask_scores[metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') # GPGO maximize performance by default, set performance to its negative value for minimization if direction: return score else: return -score import pyGPGO from pyGPGO.covfunc import matern32 from pyGPGO.acquisition import Acquisition from pyGPGO.surrogates.GaussianProcess import GaussianProcess from pyGPGO.GPGO import GPGO cov = matern32() gp = GaussianProcess(cov) acq = Acquisition(mode='ExpectedImprovement') gpgo = GPGO(gp, acq, f, param) logger.info("Max number of iteration: %i" % max_iter) gpgo.run(max_iter=max_iter) hp_opt, valid_performance_opt = gpgo.getResult() # Readout best hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(hp_opt[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(hp_opt[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] # Compare best model to default hyperparameters with open(log_file, 'a') as f: # Record hyperparameters f.write(str(params_dict)) f.write('\n') if isinstance(self.model_class, str) or isinstance(self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=params_dict) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=params_dict) score = valid_scores[self.model_class][metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') if not direction: score = -score if score > valid_performance_opt: # Optimized model is better, return hyperparameters return params_dict, score # Return default hyperparameters return hyper_parameters, valid_performance_opt
def f(l00=0, l01=0, l02=0, l03=0, l04=0, l05=0, l06=0, l07=0, l08=0, l09=0, l10=0, l11=0, l12=0, l13=0, l14=0, l15=0, l16=0, l17=0, l18=0, l19=0): """ Optimizing function Take in hyper parameter values and return valid set performances Parameters ---------- l00~l19: int or float placeholders for hyperparameters being optimized, hyper_parameters dict is rebuilt based on input values of placeholders Returns: -------- valid_scores: float valid set performances """ args = locals() # Input hyper parameters i = 0 for hp in hp_list_single: hyper_parameters[hp] = float(args[param_name[i]]) if param_range[i][0] == 'int': hyper_parameters[hp] = int(hyper_parameters[hp]) i = i + 1 for hp in hp_list_multiple: hyper_parameters[hp[0]] = [ float(args[param_name[j]]) for j in range(i, i + hp[1]) ] if param_range[i][0] == 'int': hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]]) i = i + hp[1] logger.info(hyper_parameters) # Run benchmark with open(log_file, 'a') as f: # Record hyperparameters f.write(str(hyper_parameters)) f.write('\n') if isinstance(self.model_class, str) or isinstance( self.model_class, unicode): try: train_scores, valid_scores, _ = benchmark_classification( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) except AssertionError: train_scores, valid_scores, _ = benchmark_regression( train_dataset, valid_dataset, valid_dataset, ['task_placeholder'] * n_tasks, output_transformers, n_features, metric, self.model_class, hyper_parameters=hyper_parameters) score = valid_scores[self.model_class][metric[0].name] else: model_dir = tempfile.mkdtemp() model = self.model_class(hyper_parameters, model_dir) model.fit(train_dataset, **hyper_parameters) model.save() evaluator = Evaluator(model, valid_dataset, output_transformers) multitask_scores = evaluator.compute_model_performance(metric) score = multitask_scores[metric[0].name] with open(log_file, 'a') as f: # Record performances f.write(str(score)) f.write('\n') # GPGO maximize performance by default, set performance to its negative value for minimization if direction: return score else: return -score
def run_benchmark(datasets, model, split=None, metric=None, featurizer=None, out_path='.', test=False): """ Run benchmark test on designated datasets with deepchem(or user-defined) model Parameters ---------- datasets: list of string choice of which datasets to use, should be: tox21, muv, sider, toxcast, pcba, delaney, kaggle, nci, clintox, hiv, pdbbind, chembl, qm7, qm7b, qm9, sampl model: string or user-defined model stucture choice of which model to use, deepchem provides implementation of logistic regression, random forest, multitask network, bypass multitask network, irv, graph convolution; for user define model, it should include function: fit, evaluate split: string, optional (default=None) choice of splitter function, None = using the default splitter metric: string, optional (default=None) choice of evaluation metrics, None = using the default metrics(AUC & R2) featurizer: string or dc.feat.Featurizer, optional (default=None) choice of featurization, None = using the default corresponding to model (string only applicable to deepchem models) out_path: string, optional(default='.') path of result file test: boolean, optional(default=False) whether to evaluate on test set """ for dataset in datasets: if dataset in [ 'muv', 'pcba', 'tox21', 'sider', 'toxcast', 'clintox', 'hiv' ]: mode = 'classification' if metric == None: metric = [dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)] elif dataset in [ 'kaggle', 'delaney', 'nci', 'pdbbind', 'chembl', 'qm7', 'qm7b', 'qm9', 'sampl' ]: mode = 'regression' if metric == None: metric = [ dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean) ] else: raise ValueError('Dataset not supported') if featurizer == None: # Assigning featurizer if not user defined if model in ['graphconv', 'graphconvreg']: featurizer = 'GraphConv' n_features = 75 elif model in [ 'tf', 'tf_robust', 'logreg', 'rf', 'irv', 'tf_regression', 'rf_regression' ]: featurizer = 'ECFP' n_features = 1024 else: raise ValueError( 'featurization should be specified for user-defined models' ) # Some exceptions in datasets if dataset in ['kaggle']: featurizer = None # kaggle dataset is already featurized if isinstance(model, str) and not model in [ 'tf_regression', 'rf_regression' ]: return if split in ['scaffold', 'butina', 'random']: return elif dataset in ['qm7', 'qm7b', 'qm9']: featurizer = None # qm* datasets are already featurized if isinstance(model, str) and not model in ['tf_regression']: return elif model in ['tf_regression']: model = 'tf_regression_ft' if split in ['scaffold', 'butina']: return elif dataset in ['pdbbind']: featurizer = 'grid' # pdbbind accepts grid featurizer if isinstance(model, str) and not model in [ 'tf_regression', 'rf_regression' ]: return if split in ['scaffold', 'butina']: return if not split in [ None, 'index', 'random', 'scaffold', 'butina', 'stratified' ]: raise ValueError('Splitter function not supported') loading_functions = { 'tox21': dc.molnet.load_tox21, 'muv': dc.molnet.load_muv, 'pcba': dc.molnet.load_pcba, 'nci': dc.molnet.load_nci, 'sider': dc.molnet.load_sider, 'toxcast': dc.molnet.load_toxcast, 'kaggle': dc.molnet.load_kaggle, 'delaney': dc.molnet.load_delaney, 'pdbbind': dc.molnet.load_pdbbind_grid, 'chembl': dc.molnet.load_chembl, 'qm7': dc.molnet.load_qm7_from_mat, 'qm7b': dc.molnet.load_qm7b_from_mat, 'qm9': dc.molnet.load_qm9, 'sampl': dc.molnet.load_sampl, 'clintox': dc.molnet.load_clintox, 'hiv': dc.molnet.load_hiv } print('-------------------------------------') print('Benchmark on dataset: %s' % dataset) print('-------------------------------------') # loading datasets if split is not None: print('Splitting function: %s' % split) tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, split=split) else: tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer) train_dataset, valid_dataset, test_dataset = all_dataset if dataset in ['kaggle', 'pdbbind']: n_features = train_dataset.get_data_shape()[0] elif dataset in ['qm7', 'qm7b', 'qm9']: n_features = list(train_dataset.get_data_shape()) time_start_fitting = time.time() train_scores = {} valid_scores = {} test_scores = {} if isinstance(model, str): if mode == 'classification': train_score, valid_score, test_score = benchmark_classification( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model=model, test=test) elif mode == 'regression': train_score, valid_score, test_score = benchmark_regression( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model=model, test=test) else: model.fit(train_dataset) train_scores['user_defined'] = model.evaluate( train_dataset, metric, transformers) valid_scores['user_defined'] = model.evaluate( valid_dataset, metric, transformers) if test: test_scores['user_defined'] = model.evaluate( test_dataset, metric, transformers) time_finish_fitting = time.time() with open(os.path.join(out_path, 'results.csv'), 'a') as f: writer = csv.writer(f) for i in train_score: output_line = [ dataset, str(split), mode, 'train', i, train_score[i][train_score[i].keys()[0]], 'valid', i, valid_score[i][valid_score[i].keys()[0]] ] if test: output_line.extend( ['test', i, test_score[i][test_score[i].keys()[0]]]) output_line.extend([ 'time_for_running', time_finish_fitting - time_start_fitting ]) writer.writerow(output_line)
def run_benchmark(ckpt, arg, datasets, model, split=None, metric=None, direction=True, featurizer=None, n_features=0, out_path='.', hyper_parameters=None, hyper_param_search=False, max_iter=20, search_range=2, test=False, reload=True, seed=123): """ Run benchmark test on designated datasets with deepchem(or user-defined) model Parameters ---------- datasets: list of string choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl, clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba, pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast model: string or user-defined model stucture choice of which model to use, deepchem provides implementation of logistic regression, random forest, multitask network, bypass multitask network, irv, graph convolution; for user define model, it should include function: fit, evaluate split: string, optional (default=None) choice of splitter function, None = using the default splitter metric: string, optional (default=None) choice of evaluation metrics, None = using the default metrics(AUC & R2) direction: bool, optional(default=True) Optimization direction when doing hyperparameter search Maximization(True) or minimization(False) featurizer: string or dc.feat.Featurizer, optional (default=None) choice of featurization, None = using the default corresponding to model (string only applicable to deepchem models) n_features: int, optional(default=0) depending on featurizers, redefined when using deepchem featurizers, need to be specified for user-defined featurizers(if using deepchem models) out_path: string, optional(default='.') path of result file hyper_parameters: dict, optional (default=None) hyper parameters for designated model, None = use preset values hyper_param_search: bool, optional(default=False) whether to perform hyper parameter search, using gaussian process by default max_iter: int, optional(default=20) number of optimization trials search_range: int(float), optional(default=4) optimization on [initial values / search_range, initial values * search_range] test: boolean, optional(default=False) whether to evaluate on test set reload: boolean, optional(default=True) whether to save and reload featurized datasets """ benchmark_data = {} for dataset in datasets: if dataset in [ 'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'pcba_146', 'pcba_2475', 'sider', 'tox21', 'toxcast' ]: mode = 'classification' if metric == None: metric = [ deepchem.metrics.Metric(deepchem.metrics.roc_auc_score, np.mean), ] elif dataset in [ 'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle', 'lipo', 'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9', 'sampl' ]: mode = 'regression' if metric == None: metric = [ deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score, np.mean) ] else: raise ValueError('Dataset not supported') if featurizer == None and isinstance(model, str): # Assigning featurizer if not user defined pair = (dataset, model) if pair in CheckFeaturizer: featurizer = CheckFeaturizer[pair][0] n_features = CheckFeaturizer[pair][1] else: continue if not split in [None] + CheckSplit[dataset]: continue loading_functions = { 'bace_c': deepchem.molnet.load_bace_classification, 'bace_r': deepchem.molnet.load_bace_regression, 'bbbp': deepchem.molnet.load_bbbp, 'chembl': deepchem.molnet.load_chembl, 'clearance': deepchem.molnet.load_clearance, 'clintox': deepchem.molnet.load_clintox, 'delaney': deepchem.molnet.load_delaney, 'hiv': deepchem.molnet.load_hiv, 'hopv': deepchem.molnet.load_hopv, 'kaggle': deepchem.molnet.load_kaggle, 'lipo': deepchem.molnet.load_lipo, 'muv': deepchem.molnet.load_muv, 'nci': deepchem.molnet.load_nci, 'pcba': deepchem.molnet.load_pcba, 'pcba_146': deepchem.molnet.load_pcba_146, 'pcba_2475': deepchem.molnet.load_pcba_2475, 'pdbbind': deepchem.molnet.load_pdbbind_grid, 'ppb': deepchem.molnet.load_ppb, 'qm7': deepchem.molnet.load_qm7_from_mat, 'qm7b': deepchem.molnet.load_qm7b_from_mat, 'qm8': deepchem.molnet.load_qm8, 'qm9': deepchem.molnet.load_qm9, 'sampl': deepchem.molnet.load_sampl, 'sider': deepchem.molnet.load_sider, 'tox21': deepchem.molnet.load_tox21, 'toxcast': deepchem.molnet.load_toxcast } print('-------------------------------------') print('Benchmark on dataset: %s' % dataset) print('-------------------------------------') # loading datasets if split is not None: print('Splitting function: %s' % split) tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, split=split, reload=reload) else: tasks, all_dataset, transformers = loading_functions[dataset]( featurizer=featurizer, reload=reload) train_dataset, valid_dataset, test_dataset = all_dataset time_start_fitting = time.time() train_score = {} valid_score = {} test_score = {} if hyper_param_search: if hyper_parameters is None: hyper_parameters = hps[model] search_mode = deepchem.hyper.GaussianProcessHyperparamOpt(model) hyper_param_opt, _ = search_mode.hyperparam_search( hyper_parameters, train_dataset, valid_dataset, transformers, metric, direction=direction, n_features=n_features, n_tasks=len(tasks), max_iter=max_iter, search_range=search_range) hyper_parameters = hyper_param_opt if isinstance(model, str): if mode == 'classification': train_score, valid_score, test_score = benchmark_classification( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) elif mode == 'regression': train_score, valid_score, test_score = benchmark_regression( train_dataset, valid_dataset, test_dataset, tasks, transformers, n_features, metric, model, test=test, hyper_parameters=hyper_parameters, seed=seed) else: model.fit(train_dataset) train_score['user_defined'] = model.evaluate( train_dataset, metric, transformers) valid_score['user_defined'] = model.evaluate( valid_dataset, metric, transformers) if test: test_score['user_defined'] = model.evaluate( test_dataset, metric, transformers) time_finish_fitting = time.time() with open(os.path.join(out_path, 'results.csv'), 'a') as f: writer = csv.writer(f) model_name = list(train_score.keys())[0] for i in train_score[model_name]: output_line = [ str(dataset), '_', str(model), '_', str(featurizer), '/n' ] output_line.extend([ dataset, str(split), mode, model_name, i, 'train', train_score[model_name][i], 'valid', valid_score[model_name][i] ]) if test: output_line.extend(['test', test_score[model_name][i]]) output_line.extend([ 'time_for_running', time_finish_fitting - time_start_fitting ]) writer.writerow(output_line) if hyper_param_search: with open(os.path.join(out_path, dataset + model + '.pkl'), 'w') as f: pickle.dump(hyper_parameters, f) # Logging Experiment Result benchmark_data = {} print('#########################') print(featurizer.__class__.__qualname__) print('##########################') if 'Comet' in str(featurizer.__class__.__qualname__): print('in comet') ckpt_model = '{}_{}'.format('_'.join(ckpt.split('_')[:-2]), n_features) elif 'RandFeat' in str(featurizer.__class__.__qualname__): print('in rand') ckpt_model = '{}_{}'.format('Rand', n_features) ckpt = 'rand' else: print('not comet') if ckpt is 'fingerprint': ckpt_model = '{}_{}'.format(ckpt, n_features) benchmark_data.update(hyper_parameters) benchmark_data = { 'ckpt': '{}_{}'.format(ckpt_model, model), 'task': dataset, 'model': model_name, 'train_score': train_score[model_name][i], 'val_score': valid_score[model_name][i], 'test_score': test_score[model_name][i] } benchmark_data.update(arg) result_filename = '{}.json'.format(ckpt_model) exp_name = '{}_{}_{}'.format(ckpt, dataset, model) from os.path import join, isfile list_files = [ f for f in os.listdir(out_path) if isfile(join(out_path, f)) ] if result_filename in list_files: with open(os.path.join(out_path, result_filename), 'r+') as outfile: temp = json.load(outfile) temp.update({exp_name: benchmark_data}) outfile.seek(0) json.dump(temp, outfile) outfile.truncate() else: with open(os.path.join(out_path, result_filename), 'w+') as outfile: temp = {exp_name: benchmark_data} json.dump(temp, outfile) print('Result Saved at {}'.format( os.path.join(out_path, result_filename)))