def predict(name, sp, arrsource, arrfeats, nsp, clf_scaler_feats=None, clf_factory=None, clffact_feats=None, clf_type='svm', norm=True, nfeats=100, balance_train=False): """ - arrfeats: labeled training examples array, from ppi.feature_array.arrfeats, also stored in res.cvtest result as result.exs.arrfeats. - arrsource: array of data to classify, matching the training array """ if clf_scaler_feats: clf, scaler, feats = clf_scaler_feats else: if balance_train: arrfeats = fe.balance_train(arrfeats) if clf_type in clf_factories and clf_factory is None: clf_factory, clffact_feats = clf_factories[clf_type] feats = feature_selection(arrfeats, nfeats, clffact_feats() if clffact_feats else None) arrfeats = fe.keep_cols(arrfeats, feats) clf = clf_factory() scaler = ml.fit_clf(arrfeats, clf, norm=norm) print "Classifier:", clf arrsource = fe.keep_cols(arrsource, feats) ppis = ml.classify(clf, arrsource, scaler=scaler) pres = Struct(ppis=ppis,name=name, species=sp, ppi_params=str(clf), feats=feats, nsp=nsp, arrfeats=arrfeats, balance_train=balance_train) return pres
def fold_test(arrfeats, kfold, k, clf_factory, clffact_feats, nfeats, norm, balance_train): arrtrain, arrtest = fe.arr_kfold(arrfeats, kfold, k) if balance_train: arrtrain = fe.balance_train(arrtrain) if nfeats: clf_feats = clffact_feats() feats = feature_selection(arrtrain, nfeats, clf_feats) arrtrain,arrtest = [fe.keep_cols(a,feats) for a in arrtrain,arrtest] else: feats = None clf = clf_factory() if k==0: print "Classifier:", clf scaler = ml.fit_clf(arrtrain, clf, norm=norm) if ml.exist_pos_neg(arrtrain): ppis = ml.classify(clf, arrtest, scaler=scaler, do_sort=False) else: ppis = [] return ppis,clf,scaler,feats