Exemple #1
0
def forest_config(config, n_jobs=[-1], settings=None):
    """
     forest_config: obj -> obj
     
    ---------------------------------------------------------------
     
     Sets the defaults for a custom forest configuration object
     from configobj.
     
     The defaults are only set if the setting does not exist (thus,
     it is implied that the user needs a default).
     
     Required Parameters
     -------------------
     * config: obj
             The configobj instance object to scan if defaults
             have been customized.
             
     Optional Parameters
     -------------------
     * settings: None
             The global settings to use if it exists, otherwise
             use the defaults.
    
     Returns
     -------
     * config: obj
             The configobj instance after defaults have been set
             if applicable.
     
    ---------------------------------------------------------------
    """
    config = _global_config(config, settings, RandomForestClassifier._get_param_names())
    config['n_estimators'] = [10, 64, 96, 128] if 'n_estimators' not in config else config['n_estimators']
    config['criterion'] = ['entropy'] if 'criterion' not in config else config['criterion']
    config['oob_score'] = [True] if 'oob_score' not in config else [True]
    config['class_weight'] = ['balanced'] if 'class_weight' not in config else config['class_weight']
    config['n_jobs'] = [n_jobs] if 'n_jobs' not in config else config['n_jobs']
    return config
Exemple #2
0
except ImportError:
    import draw
    import feature as fea
    import metrics as mt



BASE_PATH = Path(__file__).parent
RAA_DB = BASE_PATH / 'nr_raa_data.db'
NAA = ['A', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'K', 
       'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'V', 'W', 'Y']


svm_params = SVC._get_param_names()
knn_params = KNeighborsClassifier._get_param_names()
rf_params = RandomForestClassifier._get_param_names()
clf_param_names = {'svm': svm_params, 'knn': knn_params, 'rf': rf_params}
clf_dic = {'svm': SVC, 'knn': KNeighborsClassifier, 'rf': RandomForestClassifier}


def check_aa(aa):
    if aa[0] == "-" or aa[-1] == "-":
        raise ValueError("amino acid cluster is wrong!")
    if "-" not in aa:
        raise ValueError("need an amino acid cluster!")
    aa = aa.strip().upper()
    cls_ls = list(aa.replace("-", "")).sort()
    if NAA.sort() != cls_ls:
        raise ValueError("amino acid cluster is wrong!")
    
# TODO - query optimization