def f(l00=0,
              l01=0,
              l02=0,
              l03=0,
              l04=0,
              l05=0,
              l06=0,
              l07=0,
              l08=0,
              l09=0,
              l10=0,
              l11=0,
              l12=0,
              l13=0,
              l14=0,
              l15=0,
              l16=0,
              l17=0,
              l18=0,
              l19=0):
            """ Optimizing function
      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      l00~l19: int or float
        placeholders for hyperparameters being optimized,
        hyper_parameters dict is rebuilt based on input values of placeholders

      Returns:
      --------
      valid_scores: float
        valid set performances
      """
            args = locals()
            # Input hyper parameters
            i = 0
            for hp in hp_list_single:
                hyper_parameters[hp] = float(args[param_name[i]])
                if param_range[i][0] == 'int':
                    hyper_parameters[hp] = int(hyper_parameters[hp])
                i = i + 1
            for hp in hp_list_multiple:
                hyper_parameters[hp[0]] = [
                    float(args[param_name[j]]) for j in range(i, i + hp[1])
                ]
                if param_range[i][0] == 'int':
                    hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
                i = i + hp[1]

            print(hyper_parameters)
            # Run benchmark
            if isinstance(self.model_class, str) or isinstance(
                    self.model_class, unicode):
                try:
                    train_scores, valid_scores, _ = benchmark_classification(
                        train_dataset,
                        valid_dataset,
                        valid_dataset, ['task_placeholder'] * n_tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        hyper_parameters=hyper_parameters)
                except AssertionError:
                    train_scores, valid_scores, _ = benchmark_regression(
                        train_dataset,
                        valid_dataset,
                        valid_dataset, ['task_placeholder'] * n_tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        hyper_parameters=hyper_parameters)
                return valid_scores[self.model_class][metric[0].name]
            else:
                model_dir = tempfile.mkdtemp()
                model = self.model_class(hyper_parameters, model_dir)
                model.fit(train_dataset, **hyper_parameters)
                model.save()
                evaluator = Evaluator(model, valid_dataset,
                                      output_transformers)
                multitask_scores = evaluator.compute_model_performance(
                    [metric])
                return multitask_scores[metric.name]
Beispiel #2
0
def run_benchmark(datasets,
                  model,
                  split=None,
                  metric=None,
                  direction=True,
                  featurizer=None,
                  n_features=0,
                  out_path='.',
                  hyper_parameters=None,
                  hyper_param_search=False,
                  max_iter=20,
                  search_range=2,
                  test=False,
                  reload=True,
                  seed=123):
  """
  Run benchmark test on designated datasets with deepchem(or user-defined) model

  Parameters
  ----------
  datasets: list of string
      choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl,
      clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba,
      pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast, uv, factors,
      kinase
  model: string or user-defined model stucture
      choice of which model to use, deepchem provides implementation of
      logistic regression, random forest, multitask network,
      bypass multitask network, irv, graph convolution;
      for user define model, it should include function: fit, evaluate
  split: string,  optional (default=None)
      choice of splitter function, None = using the default splitter
  metric: string, optional (default=None)
      choice of evaluation metrics, None = using the default metrics(AUC & R2)
  direction: bool, optional(default=True)
      Optimization direction when doing hyperparameter search
      Maximization(True) or minimization(False)
  featurizer: string or dc.feat.Featurizer,  optional (default=None)
      choice of featurization, None = using the default corresponding to model
      (string only applicable to deepchem models)
  n_features: int, optional(default=0)
      depending on featurizers, redefined when using deepchem featurizers,
      need to be specified for user-defined featurizers(if using deepchem models)
  out_path: string, optional(default='.')
      path of result file
  hyper_parameters: dict, optional (default=None)
      hyper parameters for designated model, None = use preset values
  hyper_param_search: bool, optional(default=False)
      whether to perform hyper parameter search, using gaussian process by default
  max_iter: int, optional(default=20)
      number of optimization trials
  search_range: int(float), optional(default=4)
      optimization on [initial values / search_range,
                       initial values * search_range]
  test: boolean, optional(default=False)
      whether to evaluate on test set
  reload: boolean, optional(default=True)
      whether to save and reload featurized datasets
  """
  for dataset in datasets:
    if dataset in [
        'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'pcba_146',
        'pcba_2475', 'sider', 'tox21', 'toxcast'
    ]:
      mode = 'classification'
      if metric == None:
        metric = [
            deepchem.metrics.Metric(deepchem.metrics.roc_auc_score, np.mean),
        ]
    elif dataset in [
        'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle', 'lipo',
        'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9', 'sampl'
    ]:
      mode = 'regression'
      if metric == None:
        metric = [
            deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score, np.mean)
        ]
    else:
      raise ValueError('Dataset not supported')

    if featurizer == None and isinstance(model, str):
      # Assigning featurizer if not user defined
      pair = (dataset, model)
      if pair in CheckFeaturizer:
        featurizer = CheckFeaturizer[pair][0]
        n_features = CheckFeaturizer[pair][1]
      else:
        continue

    if not split in [None] + CheckSplit[dataset]:
      continue

    loading_functions = {
        'bace_c': deepchem.molnet.load_bace_classification,
        'bace_r': deepchem.molnet.load_bace_regression,
        'bbbp': deepchem.molnet.load_bbbp,
        'chembl': deepchem.molnet.load_chembl,
        'clearance': deepchem.molnet.load_clearance,
        'clintox': deepchem.molnet.load_clintox,
        'delaney': deepchem.molnet.load_delaney,
        'factors': deepchem.molnet.load_factors,
        'hiv': deepchem.molnet.load_hiv,
        'hopv': deepchem.molnet.load_hopv,
        'kaggle': deepchem.molnet.load_kaggle,
        'kinase': deepchem.molnet.load_kinase,
        'lipo': deepchem.molnet.load_lipo,
        'muv': deepchem.molnet.load_muv,
        'nci': deepchem.molnet.load_nci,
        'pcba': deepchem.molnet.load_pcba,
        'pcba_146': deepchem.molnet.load_pcba_146,
        'pcba_2475': deepchem.molnet.load_pcba_2475,
        'pdbbind': deepchem.molnet.load_pdbbind_grid,
        'ppb': deepchem.molnet.load_ppb,
        'qm7': deepchem.molnet.load_qm7_from_mat,
        'qm7b': deepchem.molnet.load_qm7b_from_mat,
        'qm8': deepchem.molnet.load_qm8,
        'qm9': deepchem.molnet.load_qm9,
        'sampl': deepchem.molnet.load_sampl,
        'sider': deepchem.molnet.load_sider,
        'tox21': deepchem.molnet.load_tox21,
        'toxcast': deepchem.molnet.load_toxcast,
        'uv': deepchem.molnet.load_uv,
    }

    print('-------------------------------------')
    print('Benchmark on dataset: %s' % dataset)
    print('-------------------------------------')
    # loading datasets
    if split is not None:
      print('Splitting function: %s' % split)
      tasks, all_dataset, transformers = loading_functions[dataset](
          featurizer=featurizer, split=split, reload=reload)
    else:
      tasks, all_dataset, transformers = loading_functions[dataset](
          featurizer=featurizer, reload=reload)

    train_dataset, valid_dataset, test_dataset = all_dataset

    time_start_fitting = time.time()
    train_score = {}
    valid_score = {}
    test_score = {}

    if hyper_param_search:
      if hyper_parameters is None:
        hyper_parameters = hps[model]
      search_mode = deepchem.hyper.GaussianProcessHyperparamOpt(model)
      hyper_param_opt, _ = search_mode.hyperparam_search(
          hyper_parameters,
          train_dataset,
          valid_dataset,
          transformers,
          metric,
          direction=direction,
          n_features=n_features,
          n_tasks=len(tasks),
          max_iter=max_iter,
          search_range=search_range)
      hyper_parameters = hyper_param_opt
    if isinstance(model, str):
      if mode == 'classification':
        train_score, valid_score, test_score = benchmark_classification(
            train_dataset,
            valid_dataset,
            test_dataset,
            tasks,
            transformers,
            n_features,
            metric,
            model,
            test=test,
            hyper_parameters=hyper_parameters,
            seed=seed)
      elif mode == 'regression':
        train_score, valid_score, test_score = benchmark_regression(
            train_dataset,
            valid_dataset,
            test_dataset,
            tasks,
            transformers,
            n_features,
            metric,
            model,
            test=test,
            hyper_parameters=hyper_parameters,
            seed=seed)
    else:
      model.fit(train_dataset)
      train_score['user_defined'] = model.evaluate(train_dataset, metric,
                                                   transformers)
      valid_score['user_defined'] = model.evaluate(valid_dataset, metric,
                                                   transformers)
      if test:
        test_score['user_defined'] = model.evaluate(test_dataset, metric,
                                                    transformers)

    time_finish_fitting = time.time()

    with open(os.path.join(out_path, 'results.csv'), 'a') as f:
      writer = csv.writer(f)
      model_name = list(train_score.keys())[0]
      for i in train_score[model_name]:
        output_line = [
            dataset,
            str(split), mode, model_name, i, 'train',
            train_score[model_name][i], 'valid', valid_score[model_name][i]
        ]
        if test:
          output_line.extend(['test', test_score[model_name][i]])
        output_line.extend(
            ['time_for_running', time_finish_fitting - time_start_fitting])
        writer.writerow(output_line)
    if hyper_param_search:
      with open(os.path.join(out_path, dataset + model + '.pkl'), 'w') as f:
        pickle.dump(hyper_parameters, f)
Beispiel #3
0
def run_benchmark(datasets,
                  model,
                  split=None,
                  metric=None,
                  featurizer=None,
                  n_features=0,
                  out_path='.',
                  hyper_parameters=None,
                  test=False,
                  reload=True,
                  seed=123):
    """
  Run benchmark test on designated datasets with deepchem(or user-defined) model
  
  Parameters
  ----------
  datasets: list of string
      choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl,
      clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba, 
      pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast 
  model: string or user-defined model stucture
      choice of which model to use, deepchem provides implementation of
      logistic regression, random forest, multitask network, 
      bypass multitask network, irv, graph convolution;
      for user define model, it should include function: fit, evaluate
  split: string,  optional (default=None)
      choice of splitter function, None = using the default splitter
  metric: string,  optional (default=None)
      choice of evaluation metrics, None = using the default metrics(AUC & R2)
  featurizer: string or dc.feat.Featurizer,  optional (default=None)
      choice of featurization, None = using the default corresponding to model
      (string only applicable to deepchem models)
  n_features: int, optional(default=0)
      depending on featurizers, redefined when using deepchem featurizers,
      need to be specified for user-defined featurizers(if using deepchem models)
  out_path: string, optional(default='.')
      path of result file
  hyper_parameters: dict, optional (default=None)
      hyper parameters for designated model, None = use preset values
  test: boolean, optional(default=False)
      whether to evaluate on test set
  reload: boolean, optional(default=True)
      whether to save and reload featurized datasets
  """
    for dataset in datasets:
        if dataset in [
                'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'sider',
                'tox21', 'toxcast'
        ]:
            mode = 'classification'
            if metric == None:
                metric = [
                    deepchem.metrics.Metric(deepchem.metrics.roc_auc_score,
                                            np.mean),
                ]
        elif dataset in [
                'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle',
                'lipo', 'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9',
                'sampl'
        ]:
            mode = 'regression'
            if metric == None:
                metric = [
                    deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score,
                                            np.mean)
                ]
        else:
            raise ValueError('Dataset not supported')

        if featurizer == None and isinstance(model, str):
            # Assigning featurizer if not user defined
            pair = (dataset, model)
            if pair in CheckFeaturizer:
                featurizer = CheckFeaturizer[pair][0]
                n_features = CheckFeaturizer[pair][1]
            else:
                continue

        if not split in [None] + CheckSplit[dataset]:
            continue

        loading_functions = {
            'bace_c': deepchem.molnet.load_bace_classification,
            'bace_r': deepchem.molnet.load_bace_regression,
            'bbbp': deepchem.molnet.load_bbbp,
            'chembl': deepchem.molnet.load_chembl,
            'clearance': deepchem.molnet.load_clearance,
            'clintox': deepchem.molnet.load_clintox,
            'delaney': deepchem.molnet.load_delaney,
            'hiv': deepchem.molnet.load_hiv,
            'hopv': deepchem.molnet.load_hopv,
            'kaggle': deepchem.molnet.load_kaggle,
            'lipo': deepchem.molnet.load_lipo,
            'muv': deepchem.molnet.load_muv,
            'nci': deepchem.molnet.load_nci,
            'pcba': deepchem.molnet.load_pcba,
            'pdbbind': deepchem.molnet.load_pdbbind_grid,
            'ppb': deepchem.molnet.load_ppb,
            'qm7': deepchem.molnet.load_qm7_from_mat,
            'qm7b': deepchem.molnet.load_qm7b_from_mat,
            'qm8': deepchem.molnet.load_qm8,
            'qm9': deepchem.molnet.load_qm9,
            'sampl': deepchem.molnet.load_sampl,
            'sider': deepchem.molnet.load_sider,
            'tox21': deepchem.molnet.load_tox21,
            'toxcast': deepchem.molnet.load_toxcast
        }

        print('-------------------------------------')
        print('Benchmark on dataset: %s' % dataset)
        print('-------------------------------------')
        # loading datasets
        if split is not None:
            print('Splitting function: %s' % split)
            tasks, all_dataset, transformers = loading_functions[dataset](
                featurizer=featurizer, split=split, reload=reload)
        else:
            tasks, all_dataset, transformers = loading_functions[dataset](
                featurizer=featurizer, reload=reload)

        train_dataset, valid_dataset, test_dataset = all_dataset

        time_start_fitting = time.time()
        train_score = {}
        valid_score = {}
        test_score = {}

        if isinstance(model, str):
            if mode == 'classification':
                train_score, valid_score, test_score = benchmark_classification(
                    train_dataset,
                    valid_dataset,
                    test_dataset,
                    tasks,
                    transformers,
                    n_features,
                    metric,
                    model,
                    test=test,
                    hyper_parameters=hyper_parameters,
                    seed=seed)
            elif mode == 'regression':
                train_score, valid_score, test_score = benchmark_regression(
                    train_dataset,
                    valid_dataset,
                    test_dataset,
                    tasks,
                    transformers,
                    n_features,
                    metric,
                    model,
                    test=test,
                    hyper_parameters=hyper_parameters,
                    seed=seed)
        else:
            model.fit(train_dataset)
            train_score['user_defined'] = model.evaluate(
                train_dataset, metric, transformers)
            valid_score['user_defined'] = model.evaluate(
                valid_dataset, metric, transformers)
            if test:
                test_score['user_defined'] = model.evaluate(
                    test_dataset, metric, transformers)

        time_finish_fitting = time.time()

        with open(os.path.join(out_path, 'results.csv'), 'a') as f:
            writer = csv.writer(f)
            model_name = list(train_score.keys())[0]
            for i in train_score[model_name]:
                output_line = [
                    dataset,
                    str(split), mode, model_name, i, 'train',
                    train_score[model_name][i], 'valid',
                    valid_score[model_name][i]
                ]
                if test:
                    output_line.extend(['test', test_score[model_name][i]])
                output_line.extend([
                    'time_for_running',
                    time_finish_fitting - time_start_fitting
                ])
                writer.writerow(output_line)
Beispiel #4
0
    def hyperparam_search(
            self,
            params_dict,
            train_dataset,
            valid_dataset,
            output_transformers,
            metric,
            direction=True,
            n_features=1024,
            n_tasks=1,
            max_iter=20,
            search_range=4,
            hp_invalid_list=[
                'seed', 'nb_epoch', 'penalty_type', 'dropouts',
                'bypass_dropouts', 'n_pair_feat', 'fit_transformers',
                'min_child_weight', 'max_delta_step', 'subsample',
                'colsample_bylevel', 'colsample_bytree', 'reg_alpha',
                'reg_lambda', 'scale_pos_weight', 'base_score'
            ],
            log_file='GPhypersearch.log'):
        """Perform hyperparams search using a gaussian process assumption

    params_dict include single-valued parameters being optimized,
    which should only contain int, float and list of int(float)

    parameters with names in hp_invalid_list will not be changed.

    For Molnet models, self.model_class is model name in string,
    params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class]

    Parameters
    ----------
    params_dict: dict
      dict including parameters and their initial values
      parameters not suitable for optimization can be added to hp_invalid_list
    train_dataset: dc.data.Dataset struct
      dataset used for training
    valid_dataset: dc.data.Dataset struct
      dataset used for validation(optimization on valid scores)
    output_transformers: list of dc.trans.Transformer
      transformers for evaluation
    metric: list of dc.metrics.Metric
      metric used for evaluation
    direction: bool
      maximization(True) or minimization(False)
    n_features: int
      number of input features
    n_tasks: int
      number of tasks
    max_iter: int
      number of optimization trials
    search_range: int(float)
      optimization on [initial values / search_range,
                       initial values * search_range]
    hp_invalid_list: list
      names of parameters that should not be optimized
    logfile: string
      name of log file, hyperparameters and results for each trial will be recorded

    Returns
    -------
    hyper_parameters: dict
      params_dict with all optimized values
    valid_performance_opt: float
      best performance on valid dataset

    """

        assert len(metric) == 1, 'Only use one metric'
        hyper_parameters = params_dict
        hp_list = hyper_parameters.keys()
        for hp in hp_invalid_list:
            if hp in hp_list:
                hp_list.remove(hp)

        hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list]
        assert set(hp_list_class) <= set([list, int, float])
        # Float or int hyper parameters(ex. batch_size, learning_rate)
        hp_list_single = [
            hp_list[i] for i in range(len(hp_list))
            if not hp_list_class[i] is list
        ]
        # List of float or int hyper parameters(ex. layer_sizes)
        hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]]))
                            for i in range(len(hp_list))
                            if hp_list_class[i] is list]

        # Number of parameters
        n_param = len(hp_list_single)
        if len(hp_list_multiple) > 0:
            n_param = n_param + sum([hp[1] for hp in hp_list_multiple])
        # Range of optimization
        param_range = []
        for hp in hp_list_single:
            if hyper_parameters[hp].__class__ is int:
                param_range.append((('int'), [
                    hyper_parameters[hp] // search_range,
                    hyper_parameters[hp] * search_range
                ]))
            else:
                param_range.append((('cont'), [
                    hyper_parameters[hp] / search_range,
                    hyper_parameters[hp] * search_range
                ]))
        for hp in hp_list_multiple:
            if hyper_parameters[hp[0]][0].__class__ is int:
                param_range.extend([(('int'), [
                    hyper_parameters[hp[0]][i] // search_range,
                    hyper_parameters[hp[0]][i] * search_range
                ]) for i in range(hp[1])])
            else:
                param_range.extend([(('cont'), [
                    hyper_parameters[hp[0]][i] / search_range,
                    hyper_parameters[hp[0]][i] * search_range
                ]) for i in range(hp[1])])

        # Dummy names
        param_name = ['l' + format(i, '02d') for i in range(20)]
        param = dict(zip(param_name[:n_param], param_range))

        data_dir = os.environ['DEEPCHEM_DATA_DIR']
        log_file = os.path.join(data_dir, log_file)

        def f(l00=0,
              l01=0,
              l02=0,
              l03=0,
              l04=0,
              l05=0,
              l06=0,
              l07=0,
              l08=0,
              l09=0,
              l10=0,
              l11=0,
              l12=0,
              l13=0,
              l14=0,
              l15=0,
              l16=0,
              l17=0,
              l18=0,
              l19=0):
            """ Optimizing function
      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      l00~l19: int or float
        placeholders for hyperparameters being optimized,
        hyper_parameters dict is rebuilt based on input values of placeholders

      Returns:
      --------
      valid_scores: float
        valid set performances
      """
            args = locals()
            # Input hyper parameters
            i = 0
            for hp in hp_list_single:
                hyper_parameters[hp] = float(args[param_name[i]])
                if param_range[i][0] == 'int':
                    hyper_parameters[hp] = int(hyper_parameters[hp])
                i = i + 1
            for hp in hp_list_multiple:
                hyper_parameters[hp[0]] = [
                    float(args[param_name[j]]) for j in range(i, i + hp[1])
                ]
                if param_range[i][0] == 'int':
                    hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
                i = i + hp[1]

            logger.info(hyper_parameters)
            # Run benchmark
            with open(log_file, 'a') as f:
                # Record hyperparameters
                f.write(str(hyper_parameters))
                f.write('\n')
            if isinstance(self.model_class, str) or isinstance(
                    self.model_class, unicode):
                try:
                    train_scores, valid_scores, _ = benchmark_classification(
                        train_dataset,
                        valid_dataset,
                        valid_dataset, ['task_placeholder'] * n_tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        hyper_parameters=hyper_parameters)
                except AssertionError:
                    train_scores, valid_scores, _ = benchmark_regression(
                        train_dataset,
                        valid_dataset,
                        valid_dataset, ['task_placeholder'] * n_tasks,
                        output_transformers,
                        n_features,
                        metric,
                        self.model_class,
                        hyper_parameters=hyper_parameters)
                score = valid_scores[self.model_class][metric[0].name]
            else:
                model_dir = tempfile.mkdtemp()
                model = self.model_class(hyper_parameters, model_dir)
                model.fit(train_dataset, **hyper_parameters)
                model.save()
                evaluator = Evaluator(model, valid_dataset,
                                      output_transformers)
                multitask_scores = evaluator.compute_model_performance(metric)
                score = multitask_scores[metric[0].name]

            with open(log_file, 'a') as f:
                # Record performances
                f.write(str(score))
                f.write('\n')
            # GPGO maximize performance by default, set performance to its negative value for minimization
            if direction:
                return score
            else:
                return -score

        import pyGPGO
        from pyGPGO.covfunc import matern32
        from pyGPGO.acquisition import Acquisition
        from pyGPGO.surrogates.GaussianProcess import GaussianProcess
        from pyGPGO.GPGO import GPGO
        cov = matern32()
        gp = GaussianProcess(cov)
        acq = Acquisition(mode='ExpectedImprovement')
        gpgo = GPGO(gp, acq, f, param)
        logger.info("Max number of iteration: %i" % max_iter)
        gpgo.run(max_iter=max_iter)

        hp_opt, valid_performance_opt = gpgo.getResult()
        # Readout best hyper parameters
        i = 0
        for hp in hp_list_single:
            hyper_parameters[hp] = float(hp_opt[param_name[i]])
            if param_range[i][0] == 'int':
                hyper_parameters[hp] = int(hyper_parameters[hp])
            i = i + 1
        for hp in hp_list_multiple:
            hyper_parameters[hp[0]] = [
                float(hp_opt[param_name[j]]) for j in range(i, i + hp[1])
            ]
            if param_range[i][0] == 'int':
                hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
            i = i + hp[1]

        # Compare best model to default hyperparameters
        with open(log_file, 'a') as f:
            # Record hyperparameters
            f.write(str(params_dict))
            f.write('\n')
        if isinstance(self.model_class, str) or isinstance(
                self.model_class, unicode):
            try:
                train_scores, valid_scores, _ = benchmark_classification(
                    train_dataset,
                    valid_dataset,
                    valid_dataset, ['task_placeholder'] * n_tasks,
                    output_transformers,
                    n_features,
                    metric,
                    self.model_class,
                    hyper_parameters=params_dict)
            except AssertionError:
                train_scores, valid_scores, _ = benchmark_regression(
                    train_dataset,
                    valid_dataset,
                    valid_dataset, ['task_placeholder'] * n_tasks,
                    output_transformers,
                    n_features,
                    metric,
                    self.model_class,
                    hyper_parameters=params_dict)
            score = valid_scores[self.model_class][metric[0].name]
            with open(log_file, 'a') as f:
                # Record performances
                f.write(str(score))
                f.write('\n')
            if not direction:
                score = -score
            if score > valid_performance_opt:
                # Optimized model is better, return hyperparameters
                return params_dict, score

        # Return default hyperparameters
        return hyper_parameters, valid_performance_opt
Beispiel #5
0
def run_benchmark(datasets,
                  model,
                  split=None,
                  metric=None,
                  featurizer=None,
                  n_features=0,
                  out_path='.',
                  hyper_parameters=None,
                  test=False,
                  seed=123):
  """
  Run benchmark test on designated datasets with deepchem(or user-defined) model
  
  Parameters
  ----------
  datasets: list of string
      choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl,
      clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba, 
      pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast 
  model: string or user-defined model stucture
      choice of which model to use, deepchem provides implementation of
      logistic regression, random forest, multitask network, 
      bypass multitask network, irv, graph convolution;
      for user define model, it should include function: fit, evaluate
  split: string,  optional (default=None)
      choice of splitter function, None = using the default splitter
  metric: string,  optional (default=None)
      choice of evaluation metrics, None = using the default metrics(AUC & R2)
  featurizer: string or dc.feat.Featurizer,  optional (default=None)
      choice of featurization, None = using the default corresponding to model
      (string only applicable to deepchem models)
  n_features: int, optional(default=0)
      depending on featurizers, redefined when using deepchem featurizers,
      need to be specified for user-defined featurizers(if using deepchem models)
  out_path: string, optional(default='.')
      path of result file
  hyper_parameters: dict, optional (default=None)
      hyper parameters for designated model, None = use preset values
  test: boolean, optional(default=False)
      whether to evaluate on test set
  """
  for dataset in datasets:
    if dataset in [
        'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'sider', 'tox21',
        'toxcast'
    ]:
      mode = 'classification'
      if metric == None:
        metric = str('auc')
    elif dataset in [
        'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle', 'lipo',
        'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9', 'sampl'
    ]:
      mode = 'regression'
      if metric == None:
        metric = str('r2')
    else:
      raise ValueError('Dataset not supported')

    metric_all = {
        'auc': deepchem.metrics.Metric(deepchem.metrics.roc_auc_score, np.mean),
        'r2': deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score,
                                      np.mean)
    }

    if isinstance(metric, str):
      metric = [metric_all[metric]]

    if featurizer == None and isinstance(model, str):
      # Assigning featurizer if not user defined
      pair = (dataset, model)
      if pair in CheckFeaturizer:
        featurizer = CheckFeaturizer[pair][0]
        n_features = CheckFeaturizer[pair][1]
      else:
        continue

    if not split in [None] + CheckSplit[dataset]:
      continue

    loading_functions = {
        'bace_c': deepchem.molnet.load_bace_classification,
        'bace_r': deepchem.molnet.load_bace_regression,
        'bbbp': deepchem.molnet.load_bbbp,
        'chembl': deepchem.molnet.load_chembl,
        'clearance': deepchem.molnet.load_clearance,
        'clintox': deepchem.molnet.load_clintox,
        'delaney': deepchem.molnet.load_delaney,
        'hiv': deepchem.molnet.load_hiv,
        'hopv': deepchem.molnet.load_hopv,
        'kaggle': deepchem.molnet.load_kaggle,
        'lipo': deepchem.molnet.load_lipo,
        'muv': deepchem.molnet.load_muv,
        'nci': deepchem.molnet.load_nci,
        'pcba': deepchem.molnet.load_pcba,
        'pdbbind': deepchem.molnet.load_pdbbind_grid,
        'ppb': deepchem.molnet.load_ppb,
        'qm7': deepchem.molnet.load_qm7_from_mat,
        'qm7b': deepchem.molnet.load_qm7b_from_mat,
        'qm8': deepchem.molnet.load_qm8,
        'qm9': deepchem.molnet.load_qm9,
        'sampl': deepchem.molnet.load_sampl,
        'sider': deepchem.molnet.load_sider,
        'tox21': deepchem.molnet.load_tox21,
        'toxcast': deepchem.molnet.load_toxcast
    }

    print('-------------------------------------')
    print('Benchmark on dataset: %s' % dataset)
    print('-------------------------------------')
    # loading datasets
    if split is not None:
      print('Splitting function: %s' % split)
      tasks, all_dataset, transformers = loading_functions[dataset](
          featurizer=featurizer, split=split)
    else:
      tasks, all_dataset, transformers = loading_functions[dataset](
          featurizer=featurizer)

    train_dataset, valid_dataset, test_dataset = all_dataset

    time_start_fitting = time.time()
    train_score = {}
    valid_score = {}
    test_score = {}

    if isinstance(model, str):
      if mode == 'classification':
        train_score, valid_score, test_score = benchmark_classification(
            train_dataset,
            valid_dataset,
            test_dataset,
            tasks,
            transformers,
            n_features,
            metric,
            model,
            test=test,
            hyper_parameters=hyper_parameters,
            seed=seed)
      elif mode == 'regression':
        train_score, valid_score, test_score = benchmark_regression(
            train_dataset,
            valid_dataset,
            test_dataset,
            tasks,
            transformers,
            n_features,
            metric,
            model,
            test=test,
            hyper_parameters=hyper_parameters,
            seed=seed)
    else:
      model.fit(train_dataset)
      train_score['user_defined'] = model.evaluate(train_dataset, metric,
                                                   transformers)
      valid_score['user_defined'] = model.evaluate(valid_dataset, metric,
                                                   transformers)
      if test:
        test_score['user_defined'] = model.evaluate(test_dataset, metric,
                                                    transformers)

    time_finish_fitting = time.time()

    with open(os.path.join(out_path, 'results.csv'), 'a') as f:
      writer = csv.writer(f)
      for i in train_score:
        output_line = [
            dataset, str(split), mode, 'train', i,
            train_score[i][list(train_score[i].keys())[0]], 'valid', i,
            valid_score[i][list(valid_score[i].keys())[0]]
        ]
        if test:
          output_line.extend(
              ['test', i, test_score[i][list(test_score[i].keys())[0]]])
        output_line.extend(
            ['time_for_running', time_finish_fitting - time_start_fitting])
        writer.writerow(output_line)
Beispiel #6
0
         test,
         tasks,
         transformers,
         n_features,
         metric,
         model,
         test=False,
         hyper_parameters=hyper_parameters,
         seed=seed)
 elif mode == 'regression':
     train_score, valid_score, test_score = benchmark_regression(
         train,
         valid,
         test,
         tasks,
         transformers,
         n_features,
         metric,
         model,
         test=False,
         hyper_parameters=hyper_parameters,
         seed=seed)
 with open(
         os.path.join(out_path, 'results_frac_train_curve.csv'),
         'a') as f:
     writer = csv.writer(f)
     model_name = list(train_score.keys())[0]
     for i in train_score[model_name]:
         output_line = [
             dataset,
             str(split), mode, model_name, i, 'train',
             train_score[model_name][i], 'valid',
Beispiel #7
0
  def hyperparam_search(
      self,
      params_dict,
      train_dataset,
      valid_dataset,
      output_transformers,
      metric,
      direction=True,
      n_features=1024,
      n_tasks=1,
      max_iter=20,
      search_range=4,
      hp_invalid_list=[
          'seed', 'nb_epoch', 'penalty_type', 'dropouts', 'bypass_dropouts',
          'n_pair_feat', 'fit_transformers', 'min_child_weight',
          'max_delta_step', 'subsample', 'colsample_bylevel',
          'colsample_bytree', 'reg_alpha', 'reg_lambda', 'scale_pos_weight',
          'base_score'
      ],
      log_file='GPhypersearch.log'):
    """Perform hyperparams search using a gaussian process assumption

    params_dict include single-valued parameters being optimized,
    which should only contain int, float and list of int(float)

    parameters with names in hp_invalid_list will not be changed.

    For Molnet models, self.model_class is model name in string,
    params_dict = dc.molnet.preset_hyper_parameters.hps[self.model_class]

    Parameters
    ----------
    params_dict: dict
      dict including parameters and their initial values
      parameters not suitable for optimization can be added to hp_invalid_list
    train_dataset: dc.data.Dataset struct
      dataset used for training
    valid_dataset: dc.data.Dataset struct
      dataset used for validation(optimization on valid scores)
    output_transformers: list of dc.trans.Transformer
      transformers for evaluation
    metric: list of dc.metrics.Metric
      metric used for evaluation
    direction: bool
      maximization(True) or minimization(False)
    n_features: int
      number of input features
    n_tasks: int
      number of tasks
    max_iter: int
      number of optimization trials
    search_range: int(float)
      optimization on [initial values / search_range,
                       initial values * search_range]
    hp_invalid_list: list
      names of parameters that should not be optimized
    logfile: string
      name of log file, hyperparameters and results for each trial will be recorded

    Returns
    -------
    hyper_parameters: dict
      params_dict with all optimized values
    valid_performance_opt: float
      best performance on valid dataset

    """

    assert len(metric) == 1, 'Only use one metric'
    hyper_parameters = params_dict
    hp_list = list(hyper_parameters.keys())
    for hp in hp_invalid_list:
      if hp in hp_list:
        hp_list.remove(hp)

    hp_list_class = [hyper_parameters[hp].__class__ for hp in hp_list]
    assert set(hp_list_class) <= set([list, int, float])
    # Float or int hyper parameters(ex. batch_size, learning_rate)
    hp_list_single = [
        hp_list[i] for i in range(len(hp_list)) if not hp_list_class[i] is list
    ]
    # List of float or int hyper parameters(ex. layer_sizes)
    hp_list_multiple = [(hp_list[i], len(hyper_parameters[hp_list[i]]))
                        for i in range(len(hp_list))
                        if hp_list_class[i] is list]

    # Number of parameters
    n_param = len(hp_list_single)
    if len(hp_list_multiple) > 0:
      n_param = n_param + sum([hp[1] for hp in hp_list_multiple])
    # Range of optimization
    param_range = []
    for hp in hp_list_single:
      if hyper_parameters[hp].__class__ is int:
        param_range.append((('int'), [
            hyper_parameters[hp] // search_range,
            hyper_parameters[hp] * search_range
        ]))
      else:
        param_range.append((('cont'), [
            hyper_parameters[hp] / search_range,
            hyper_parameters[hp] * search_range
        ]))
    for hp in hp_list_multiple:
      if hyper_parameters[hp[0]][0].__class__ is int:
        param_range.extend([(('int'), [
            hyper_parameters[hp[0]][i] // search_range,
            hyper_parameters[hp[0]][i] * search_range
        ]) for i in range(hp[1])])
      else:
        param_range.extend([(('cont'), [
            hyper_parameters[hp[0]][i] / search_range,
            hyper_parameters[hp[0]][i] * search_range
        ]) for i in range(hp[1])])

    # Dummy names
    param_name = ['l' + format(i, '02d') for i in range(20)]
    param = dict(zip(param_name[:n_param], param_range))

    data_dir = os.environ['DEEPCHEM_DATA_DIR']
    log_file = os.path.join(data_dir, log_file)

    def f(l00=0,
          l01=0,
          l02=0,
          l03=0,
          l04=0,
          l05=0,
          l06=0,
          l07=0,
          l08=0,
          l09=0,
          l10=0,
          l11=0,
          l12=0,
          l13=0,
          l14=0,
          l15=0,
          l16=0,
          l17=0,
          l18=0,
          l19=0):
      """ Optimizing function
      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      l00~l19: int or float
        placeholders for hyperparameters being optimized,
        hyper_parameters dict is rebuilt based on input values of placeholders

      Returns:
      --------
      valid_scores: float
        valid set performances
      """
      args = locals()
      # Input hyper parameters
      i = 0
      for hp in hp_list_single:
        hyper_parameters[hp] = float(args[param_name[i]])
        if param_range[i][0] == 'int':
          hyper_parameters[hp] = int(hyper_parameters[hp])
        i = i + 1
      for hp in hp_list_multiple:
        hyper_parameters[hp[0]] = [
            float(args[param_name[j]]) for j in range(i, i + hp[1])
        ]
        if param_range[i][0] == 'int':
          hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
        i = i + hp[1]

      logger.info(hyper_parameters)
      # Run benchmark
      with open(log_file, 'a') as f:
        # Record hyperparameters
        f.write(str(hyper_parameters))
        f.write('\n')
      if isinstance(self.model_class, str) or isinstance(
          self.model_class, unicode):
        try:
          train_scores, valid_scores, _ = benchmark_classification(
              train_dataset,
              valid_dataset,
              valid_dataset, ['task_placeholder'] * n_tasks,
              output_transformers,
              n_features,
              metric,
              self.model_class,
              hyper_parameters=hyper_parameters)
        except AssertionError:
          train_scores, valid_scores, _ = benchmark_regression(
              train_dataset,
              valid_dataset,
              valid_dataset, ['task_placeholder'] * n_tasks,
              output_transformers,
              n_features,
              metric,
              self.model_class,
              hyper_parameters=hyper_parameters)
        score = valid_scores[self.model_class][metric[0].name]
      else:
        model_dir = tempfile.mkdtemp()
        model = self.model_class(hyper_parameters, model_dir)
        model.fit(train_dataset, **hyper_parameters)
        model.save()
        evaluator = Evaluator(model, valid_dataset, output_transformers)
        multitask_scores = evaluator.compute_model_performance(metric)
        score = multitask_scores[metric[0].name]

      with open(log_file, 'a') as f:
        # Record performances
        f.write(str(score))
        f.write('\n')
      # GPGO maximize performance by default, set performance to its negative value for minimization
      if direction:
        return score
      else:
        return -score

    import pyGPGO
    from pyGPGO.covfunc import matern32
    from pyGPGO.acquisition import Acquisition
    from pyGPGO.surrogates.GaussianProcess import GaussianProcess
    from pyGPGO.GPGO import GPGO
    cov = matern32()
    gp = GaussianProcess(cov)
    acq = Acquisition(mode='ExpectedImprovement')
    gpgo = GPGO(gp, acq, f, param)
    logger.info("Max number of iteration: %i" % max_iter)
    gpgo.run(max_iter=max_iter)

    hp_opt, valid_performance_opt = gpgo.getResult()
    # Readout best hyper parameters
    i = 0
    for hp in hp_list_single:
      hyper_parameters[hp] = float(hp_opt[param_name[i]])
      if param_range[i][0] == 'int':
        hyper_parameters[hp] = int(hyper_parameters[hp])
      i = i + 1
    for hp in hp_list_multiple:
      hyper_parameters[hp[0]] = [
          float(hp_opt[param_name[j]]) for j in range(i, i + hp[1])
      ]
      if param_range[i][0] == 'int':
        hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
      i = i + hp[1]

    # Compare best model to default hyperparameters
    with open(log_file, 'a') as f:
      # Record hyperparameters
      f.write(str(params_dict))
      f.write('\n')
    if isinstance(self.model_class, str) or isinstance(self.model_class,
                                                       unicode):
      try:
        train_scores, valid_scores, _ = benchmark_classification(
            train_dataset,
            valid_dataset,
            valid_dataset, ['task_placeholder'] * n_tasks,
            output_transformers,
            n_features,
            metric,
            self.model_class,
            hyper_parameters=params_dict)
      except AssertionError:
        train_scores, valid_scores, _ = benchmark_regression(
            train_dataset,
            valid_dataset,
            valid_dataset, ['task_placeholder'] * n_tasks,
            output_transformers,
            n_features,
            metric,
            self.model_class,
            hyper_parameters=params_dict)
      score = valid_scores[self.model_class][metric[0].name]
      with open(log_file, 'a') as f:
        # Record performances
        f.write(str(score))
        f.write('\n')
      if not direction:
        score = -score
      if score > valid_performance_opt:
        # Optimized model is better, return hyperparameters
        return params_dict, score

    # Return default hyperparameters
    return hyper_parameters, valid_performance_opt
Beispiel #8
0
    def f(l00=0,
          l01=0,
          l02=0,
          l03=0,
          l04=0,
          l05=0,
          l06=0,
          l07=0,
          l08=0,
          l09=0,
          l10=0,
          l11=0,
          l12=0,
          l13=0,
          l14=0,
          l15=0,
          l16=0,
          l17=0,
          l18=0,
          l19=0):
      """ Optimizing function
      Take in hyper parameter values and return valid set performances

      Parameters
      ----------
      l00~l19: int or float
        placeholders for hyperparameters being optimized,
        hyper_parameters dict is rebuilt based on input values of placeholders

      Returns:
      --------
      valid_scores: float
        valid set performances
      """
      args = locals()
      # Input hyper parameters
      i = 0
      for hp in hp_list_single:
        hyper_parameters[hp] = float(args[param_name[i]])
        if param_range[i][0] == 'int':
          hyper_parameters[hp] = int(hyper_parameters[hp])
        i = i + 1
      for hp in hp_list_multiple:
        hyper_parameters[hp[0]] = [
            float(args[param_name[j]]) for j in range(i, i + hp[1])
        ]
        if param_range[i][0] == 'int':
          hyper_parameters[hp[0]] = map(int, hyper_parameters[hp[0]])
        i = i + hp[1]

      logger.info(hyper_parameters)
      # Run benchmark
      with open(log_file, 'a') as f:
        # Record hyperparameters
        f.write(str(hyper_parameters))
        f.write('\n')
      if isinstance(self.model_class, str) or isinstance(
          self.model_class, unicode):
        try:
          train_scores, valid_scores, _ = benchmark_classification(
              train_dataset,
              valid_dataset,
              valid_dataset, ['task_placeholder'] * n_tasks,
              output_transformers,
              n_features,
              metric,
              self.model_class,
              hyper_parameters=hyper_parameters)
        except AssertionError:
          train_scores, valid_scores, _ = benchmark_regression(
              train_dataset,
              valid_dataset,
              valid_dataset, ['task_placeholder'] * n_tasks,
              output_transformers,
              n_features,
              metric,
              self.model_class,
              hyper_parameters=hyper_parameters)
        score = valid_scores[self.model_class][metric[0].name]
      else:
        model_dir = tempfile.mkdtemp()
        model = self.model_class(hyper_parameters, model_dir)
        model.fit(train_dataset, **hyper_parameters)
        model.save()
        evaluator = Evaluator(model, valid_dataset, output_transformers)
        multitask_scores = evaluator.compute_model_performance(metric)
        score = multitask_scores[metric[0].name]

      with open(log_file, 'a') as f:
        # Record performances
        f.write(str(score))
        f.write('\n')
      # GPGO maximize performance by default, set performance to its negative value for minimization
      if direction:
        return score
      else:
        return -score
Beispiel #9
0
def run_benchmark(datasets,
                  model,
                  split=None,
                  metric=None,
                  featurizer=None,
                  out_path='.',
                  test=False):
    """
  Run benchmark test on designated datasets with deepchem(or user-defined) model
  
  Parameters
  ----------
  datasets: list of string
      choice of which datasets to use, should be: tox21, muv, sider, 
      toxcast, pcba, delaney, kaggle, nci, clintox, hiv, pdbbind, chembl,
      qm7, qm7b, qm9, sampl
  model: string or user-defined model stucture
      choice of which model to use, deepchem provides implementation of
      logistic regression, random forest, multitask network, 
      bypass multitask network, irv, graph convolution;
      for user define model, it should include function: fit, evaluate
  split: string,  optional (default=None)
      choice of splitter function, None = using the default splitter
  metric: string,  optional (default=None)
      choice of evaluation metrics, None = using the default metrics(AUC & R2)
  featurizer: string or dc.feat.Featurizer,  optional (default=None)
      choice of featurization, None = using the default corresponding to model
      (string only applicable to deepchem models)
  out_path: string, optional(default='.')
      path of result file
  test: boolean, optional(default=False)
      whether to evaluate on test set
  """
    for dataset in datasets:
        if dataset in [
                'muv', 'pcba', 'tox21', 'sider', 'toxcast', 'clintox', 'hiv'
        ]:
            mode = 'classification'
            if metric == None:
                metric = [dc.metrics.Metric(dc.metrics.roc_auc_score, np.mean)]
        elif dataset in [
                'kaggle', 'delaney', 'nci', 'pdbbind', 'chembl', 'qm7', 'qm7b',
                'qm9', 'sampl'
        ]:
            mode = 'regression'
            if metric == None:
                metric = [
                    dc.metrics.Metric(dc.metrics.pearson_r2_score, np.mean)
                ]
        else:
            raise ValueError('Dataset not supported')

        if featurizer == None:
            # Assigning featurizer if not user defined
            if model in ['graphconv', 'graphconvreg']:
                featurizer = 'GraphConv'
                n_features = 75
            elif model in [
                    'tf', 'tf_robust', 'logreg', 'rf', 'irv', 'tf_regression',
                    'rf_regression'
            ]:
                featurizer = 'ECFP'
                n_features = 1024
            else:
                raise ValueError(
                    'featurization should be specified for user-defined models'
                )
            # Some exceptions in datasets
            if dataset in ['kaggle']:
                featurizer = None  # kaggle dataset is already featurized
                if isinstance(model, str) and not model in [
                        'tf_regression', 'rf_regression'
                ]:
                    return
                if split in ['scaffold', 'butina', 'random']:
                    return
            elif dataset in ['qm7', 'qm7b', 'qm9']:
                featurizer = None  # qm* datasets are already featurized
                if isinstance(model, str) and not model in ['tf_regression']:
                    return
                elif model in ['tf_regression']:
                    model = 'tf_regression_ft'
                if split in ['scaffold', 'butina']:
                    return
            elif dataset in ['pdbbind']:
                featurizer = 'grid'  # pdbbind accepts grid featurizer
                if isinstance(model, str) and not model in [
                        'tf_regression', 'rf_regression'
                ]:
                    return
                if split in ['scaffold', 'butina']:
                    return

        if not split in [
                None, 'index', 'random', 'scaffold', 'butina', 'stratified'
        ]:
            raise ValueError('Splitter function not supported')

        loading_functions = {
            'tox21': dc.molnet.load_tox21,
            'muv': dc.molnet.load_muv,
            'pcba': dc.molnet.load_pcba,
            'nci': dc.molnet.load_nci,
            'sider': dc.molnet.load_sider,
            'toxcast': dc.molnet.load_toxcast,
            'kaggle': dc.molnet.load_kaggle,
            'delaney': dc.molnet.load_delaney,
            'pdbbind': dc.molnet.load_pdbbind_grid,
            'chembl': dc.molnet.load_chembl,
            'qm7': dc.molnet.load_qm7_from_mat,
            'qm7b': dc.molnet.load_qm7b_from_mat,
            'qm9': dc.molnet.load_qm9,
            'sampl': dc.molnet.load_sampl,
            'clintox': dc.molnet.load_clintox,
            'hiv': dc.molnet.load_hiv
        }

        print('-------------------------------------')
        print('Benchmark on dataset: %s' % dataset)
        print('-------------------------------------')
        # loading datasets
        if split is not None:
            print('Splitting function: %s' % split)
            tasks, all_dataset, transformers = loading_functions[dataset](
                featurizer=featurizer, split=split)
        else:
            tasks, all_dataset, transformers = loading_functions[dataset](
                featurizer=featurizer)

        train_dataset, valid_dataset, test_dataset = all_dataset
        if dataset in ['kaggle', 'pdbbind']:
            n_features = train_dataset.get_data_shape()[0]
        elif dataset in ['qm7', 'qm7b', 'qm9']:
            n_features = list(train_dataset.get_data_shape())

        time_start_fitting = time.time()
        train_scores = {}
        valid_scores = {}
        test_scores = {}

        if isinstance(model, str):
            if mode == 'classification':
                train_score, valid_score, test_score = benchmark_classification(
                    train_dataset,
                    valid_dataset,
                    test_dataset,
                    tasks,
                    transformers,
                    n_features,
                    metric,
                    model=model,
                    test=test)
            elif mode == 'regression':
                train_score, valid_score, test_score = benchmark_regression(
                    train_dataset,
                    valid_dataset,
                    test_dataset,
                    tasks,
                    transformers,
                    n_features,
                    metric,
                    model=model,
                    test=test)
        else:
            model.fit(train_dataset)
            train_scores['user_defined'] = model.evaluate(
                train_dataset, metric, transformers)
            valid_scores['user_defined'] = model.evaluate(
                valid_dataset, metric, transformers)
            if test:
                test_scores['user_defined'] = model.evaluate(
                    test_dataset, metric, transformers)

        time_finish_fitting = time.time()

        with open(os.path.join(out_path, 'results.csv'), 'a') as f:
            writer = csv.writer(f)
            for i in train_score:
                output_line = [
                    dataset,
                    str(split), mode, 'train', i,
                    train_score[i][train_score[i].keys()[0]], 'valid', i,
                    valid_score[i][valid_score[i].keys()[0]]
                ]
                if test:
                    output_line.extend(
                        ['test', i, test_score[i][test_score[i].keys()[0]]])
                output_line.extend([
                    'time_for_running',
                    time_finish_fitting - time_start_fitting
                ])
                writer.writerow(output_line)
Beispiel #10
0
def run_benchmark(ckpt,
                  arg,
                  datasets,
                  model,
                  split=None,
                  metric=None,
                  direction=True,
                  featurizer=None,
                  n_features=0,
                  out_path='.',
                  hyper_parameters=None,
                  hyper_param_search=False,
                  max_iter=20,
                  search_range=2,
                  test=False,
                  reload=True,
                  seed=123):
    """
  Run benchmark test on designated datasets with deepchem(or user-defined) model

  Parameters
  ----------
  datasets: list of string
      choice of which datasets to use, should be: bace_c, bace_r, bbbp, chembl,
      clearance, clintox, delaney, hiv, hopv, kaggle, lipo, muv, nci, pcba,
      pdbbind, ppb, qm7, qm7b, qm8, qm9, sampl, sider, tox21, toxcast
  model: string or user-defined model stucture
      choice of which model to use, deepchem provides implementation of
      logistic regression, random forest, multitask network,
      bypass multitask network, irv, graph convolution;
      for user define model, it should include function: fit, evaluate
  split: string,  optional (default=None)
      choice of splitter function, None = using the default splitter
  metric: string, optional (default=None)
      choice of evaluation metrics, None = using the default metrics(AUC & R2)
  direction: bool, optional(default=True)
      Optimization direction when doing hyperparameter search
      Maximization(True) or minimization(False)
  featurizer: string or dc.feat.Featurizer,  optional (default=None)
      choice of featurization, None = using the default corresponding to model
      (string only applicable to deepchem models)
  n_features: int, optional(default=0)
      depending on featurizers, redefined when using deepchem featurizers,
      need to be specified for user-defined featurizers(if using deepchem models)
  out_path: string, optional(default='.')
      path of result file
  hyper_parameters: dict, optional (default=None)
      hyper parameters for designated model, None = use preset values
  hyper_param_search: bool, optional(default=False)
      whether to perform hyper parameter search, using gaussian process by default
  max_iter: int, optional(default=20)
      number of optimization trials
  search_range: int(float), optional(default=4)
      optimization on [initial values / search_range,
                       initial values * search_range]
  test: boolean, optional(default=False)
      whether to evaluate on test set
  reload: boolean, optional(default=True)
      whether to save and reload featurized datasets
  """

    benchmark_data = {}
    for dataset in datasets:
        if dataset in [
                'bace_c', 'bbbp', 'clintox', 'hiv', 'muv', 'pcba', 'pcba_146',
                'pcba_2475', 'sider', 'tox21', 'toxcast'
        ]:
            mode = 'classification'
            if metric == None:
                metric = [
                    deepchem.metrics.Metric(deepchem.metrics.roc_auc_score,
                                            np.mean),
                ]
        elif dataset in [
                'bace_r', 'chembl', 'clearance', 'delaney', 'hopv', 'kaggle',
                'lipo', 'nci', 'pdbbind', 'ppb', 'qm7', 'qm7b', 'qm8', 'qm9',
                'sampl'
        ]:
            mode = 'regression'
            if metric == None:
                metric = [
                    deepchem.metrics.Metric(deepchem.metrics.pearson_r2_score,
                                            np.mean)
                ]
        else:
            raise ValueError('Dataset not supported')

        if featurizer == None and isinstance(model, str):
            # Assigning featurizer if not user defined
            pair = (dataset, model)
            if pair in CheckFeaturizer:
                featurizer = CheckFeaturizer[pair][0]
                n_features = CheckFeaturizer[pair][1]
            else:
                continue

        if not split in [None] + CheckSplit[dataset]:
            continue

        loading_functions = {
            'bace_c': deepchem.molnet.load_bace_classification,
            'bace_r': deepchem.molnet.load_bace_regression,
            'bbbp': deepchem.molnet.load_bbbp,
            'chembl': deepchem.molnet.load_chembl,
            'clearance': deepchem.molnet.load_clearance,
            'clintox': deepchem.molnet.load_clintox,
            'delaney': deepchem.molnet.load_delaney,
            'hiv': deepchem.molnet.load_hiv,
            'hopv': deepchem.molnet.load_hopv,
            'kaggle': deepchem.molnet.load_kaggle,
            'lipo': deepchem.molnet.load_lipo,
            'muv': deepchem.molnet.load_muv,
            'nci': deepchem.molnet.load_nci,
            'pcba': deepchem.molnet.load_pcba,
            'pcba_146': deepchem.molnet.load_pcba_146,
            'pcba_2475': deepchem.molnet.load_pcba_2475,
            'pdbbind': deepchem.molnet.load_pdbbind_grid,
            'ppb': deepchem.molnet.load_ppb,
            'qm7': deepchem.molnet.load_qm7_from_mat,
            'qm7b': deepchem.molnet.load_qm7b_from_mat,
            'qm8': deepchem.molnet.load_qm8,
            'qm9': deepchem.molnet.load_qm9,
            'sampl': deepchem.molnet.load_sampl,
            'sider': deepchem.molnet.load_sider,
            'tox21': deepchem.molnet.load_tox21,
            'toxcast': deepchem.molnet.load_toxcast
        }

        print('-------------------------------------')
        print('Benchmark on dataset: %s' % dataset)
        print('-------------------------------------')
        # loading datasets
        if split is not None:
            print('Splitting function: %s' % split)
            tasks, all_dataset, transformers = loading_functions[dataset](
                featurizer=featurizer, split=split, reload=reload)
        else:
            tasks, all_dataset, transformers = loading_functions[dataset](
                featurizer=featurizer, reload=reload)

        train_dataset, valid_dataset, test_dataset = all_dataset

        time_start_fitting = time.time()
        train_score = {}
        valid_score = {}
        test_score = {}

        if hyper_param_search:
            if hyper_parameters is None:
                hyper_parameters = hps[model]
            search_mode = deepchem.hyper.GaussianProcessHyperparamOpt(model)
            hyper_param_opt, _ = search_mode.hyperparam_search(
                hyper_parameters,
                train_dataset,
                valid_dataset,
                transformers,
                metric,
                direction=direction,
                n_features=n_features,
                n_tasks=len(tasks),
                max_iter=max_iter,
                search_range=search_range)
            hyper_parameters = hyper_param_opt
        if isinstance(model, str):
            if mode == 'classification':
                train_score, valid_score, test_score = benchmark_classification(
                    train_dataset,
                    valid_dataset,
                    test_dataset,
                    tasks,
                    transformers,
                    n_features,
                    metric,
                    model,
                    test=test,
                    hyper_parameters=hyper_parameters,
                    seed=seed)
            elif mode == 'regression':
                train_score, valid_score, test_score = benchmark_regression(
                    train_dataset,
                    valid_dataset,
                    test_dataset,
                    tasks,
                    transformers,
                    n_features,
                    metric,
                    model,
                    test=test,
                    hyper_parameters=hyper_parameters,
                    seed=seed)
        else:
            model.fit(train_dataset)
            train_score['user_defined'] = model.evaluate(
                train_dataset, metric, transformers)
            valid_score['user_defined'] = model.evaluate(
                valid_dataset, metric, transformers)
            if test:
                test_score['user_defined'] = model.evaluate(
                    test_dataset, metric, transformers)

        time_finish_fitting = time.time()

        with open(os.path.join(out_path, 'results.csv'), 'a') as f:
            writer = csv.writer(f)
            model_name = list(train_score.keys())[0]
            for i in train_score[model_name]:
                output_line = [
                    str(dataset), '_',
                    str(model), '_',
                    str(featurizer), '/n'
                ]
                output_line.extend([
                    dataset,
                    str(split), mode, model_name, i, 'train',
                    train_score[model_name][i], 'valid',
                    valid_score[model_name][i]
                ])
                if test:
                    output_line.extend(['test', test_score[model_name][i]])
                output_line.extend([
                    'time_for_running',
                    time_finish_fitting - time_start_fitting
                ])
                writer.writerow(output_line)
        if hyper_param_search:
            with open(os.path.join(out_path, dataset + model + '.pkl'),
                      'w') as f:
                pickle.dump(hyper_parameters, f)

        # Logging Experiment Result

        benchmark_data = {}
        print('#########################')
        print(featurizer.__class__.__qualname__)
        print('##########################')
        if 'Comet' in str(featurizer.__class__.__qualname__):
            print('in comet')
            ckpt_model = '{}_{}'.format('_'.join(ckpt.split('_')[:-2]),
                                        n_features)
        elif 'RandFeat' in str(featurizer.__class__.__qualname__):
            print('in rand')
            ckpt_model = '{}_{}'.format('Rand', n_features)
            ckpt = 'rand'

        else:
            print('not comet')
            if ckpt is 'fingerprint':
                ckpt_model = '{}_{}'.format(ckpt, n_features)
                benchmark_data.update(hyper_parameters)

        benchmark_data = {
            'ckpt': '{}_{}'.format(ckpt_model, model),
            'task': dataset,
            'model': model_name,
            'train_score': train_score[model_name][i],
            'val_score': valid_score[model_name][i],
            'test_score': test_score[model_name][i]
        }

        benchmark_data.update(arg)
        result_filename = '{}.json'.format(ckpt_model)
        exp_name = '{}_{}_{}'.format(ckpt, dataset, model)

        from os.path import join, isfile
        list_files = [
            f for f in os.listdir(out_path) if isfile(join(out_path, f))
        ]

        if result_filename in list_files:
            with open(os.path.join(out_path, result_filename),
                      'r+') as outfile:
                temp = json.load(outfile)
                temp.update({exp_name: benchmark_data})
                outfile.seek(0)
                json.dump(temp, outfile)
                outfile.truncate()
        else:
            with open(os.path.join(out_path, result_filename),
                      'w+') as outfile:
                temp = {exp_name: benchmark_data}
                json.dump(temp, outfile)
        print('Result Saved at {}'.format(
            os.path.join(out_path, result_filename)))