def forest_config(config, n_jobs=[-1], settings=None): """ forest_config: obj -> obj --------------------------------------------------------------- Sets the defaults for a custom forest configuration object from configobj. The defaults are only set if the setting does not exist (thus, it is implied that the user needs a default). Required Parameters ------------------- * config: obj The configobj instance object to scan if defaults have been customized. Optional Parameters ------------------- * settings: None The global settings to use if it exists, otherwise use the defaults. Returns ------- * config: obj The configobj instance after defaults have been set if applicable. --------------------------------------------------------------- """ config = _global_config(config, settings, RandomForestClassifier._get_param_names()) config['n_estimators'] = [10, 64, 96, 128] if 'n_estimators' not in config else config['n_estimators'] config['criterion'] = ['entropy'] if 'criterion' not in config else config['criterion'] config['oob_score'] = [True] if 'oob_score' not in config else [True] config['class_weight'] = ['balanced'] if 'class_weight' not in config else config['class_weight'] config['n_jobs'] = [n_jobs] if 'n_jobs' not in config else config['n_jobs'] return config
except ImportError: import draw import feature as fea import metrics as mt BASE_PATH = Path(__file__).parent RAA_DB = BASE_PATH / 'nr_raa_data.db' NAA = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y'] svm_params = SVC._get_param_names() knn_params = KNeighborsClassifier._get_param_names() rf_params = RandomForestClassifier._get_param_names() clf_param_names = {'svm': svm_params, 'knn': knn_params, 'rf': rf_params} clf_dic = {'svm': SVC, 'knn': KNeighborsClassifier, 'rf': RandomForestClassifier} def check_aa(aa): if aa[0] == "-" or aa[-1] == "-": raise ValueError("amino acid cluster is wrong!") if "-" not in aa: raise ValueError("need an amino acid cluster!") aa = aa.strip().upper() cls_ls = list(aa.replace("-", "")).sort() if NAA.sort() != cls_ls: raise ValueError("amino acid cluster is wrong!") # TODO - query optimization