Esempio n. 1
0
def predict(name, sp, arrsource, arrfeats, nsp, clf_scaler_feats=None,
        clf_factory=None, clffact_feats=None, clf_type='svm', norm=True, nfeats=100, balance_train=False):
    """
    - arrfeats: labeled training examples array, from
      ppi.feature_array.arrfeats, also stored in res.cvtest result as
      result.exs.arrfeats.
    - arrsource: array of data to classify, matching the training array
    """
    if clf_scaler_feats:
        clf, scaler, feats = clf_scaler_feats
    else:
        if balance_train:
            arrfeats = fe.balance_train(arrfeats)
        if clf_type in clf_factories and clf_factory is None:
            clf_factory, clffact_feats = clf_factories[clf_type]
        feats = feature_selection(arrfeats, nfeats, clffact_feats() if clffact_feats
                else None)
        arrfeats = fe.keep_cols(arrfeats, feats)
        clf = clf_factory()
        scaler = ml.fit_clf(arrfeats, clf, norm=norm)
    print "Classifier:", clf
    arrsource = fe.keep_cols(arrsource, feats)
    ppis = ml.classify(clf, arrsource, scaler=scaler)
    pres = Struct(ppis=ppis,name=name, species=sp, ppi_params=str(clf),
            feats=feats, nsp=nsp, arrfeats=arrfeats,
            balance_train=balance_train)
    return pres
Esempio n. 2
0
def fold_test(arrfeats, kfold, k, clf_factory, clffact_feats, nfeats, norm,
        balance_train):
    arrtrain, arrtest = fe.arr_kfold(arrfeats, kfold, k)
    if balance_train:
        arrtrain = fe.balance_train(arrtrain)
    if nfeats:
        clf_feats = clffact_feats()
        feats = feature_selection(arrtrain, nfeats, clf_feats)
        arrtrain,arrtest = [fe.keep_cols(a,feats) for a in arrtrain,arrtest]
    else:
        feats = None
    clf = clf_factory()
    if k==0: print "Classifier:", clf
    scaler = ml.fit_clf(arrtrain, clf, norm=norm)
    if ml.exist_pos_neg(arrtrain):
        ppis = ml.classify(clf, arrtest, scaler=scaler, do_sort=False)
    else:
        ppis = []
    return ppis,clf,scaler,feats