def main(project_name, ind=None): training = pd.read_csv(os.path.normpath( os.path.realpath( r"dataset\{0}\classes\training.csv".format(project_name))), sep=';') testing = pd.read_csv(os.path.normpath( os.path.realpath( r"dataset\{0}\classes\testing.csv".format(project_name))), sep=';') training_y = training['Bugged'].apply(lambda x: 1 if x else 0) training_X = training.drop('Bugged', axis=1) testing_y = testing['Bugged'].apply(lambda x: 1 if x else 0) testing_X = testing.drop('Bugged', axis=1) clf = HungaBungaClassifier(brain=True, ind=ind, scoring=metrics.make_scorer(pr_auc_score, needs_proba=True)) clf.fit(training_X, training_y) model = clf.model print( json.dumps({ 'model': model.__class__.__name__, 'score': '%0.3f' % eval(model, model.classes_, testing_X, testing_y) }))
from sklearn import datasets iris = datasets.load_iris() x, y = iris.data, iris.target from hunga_bunga import HungaBungaClassifier, HungaBungaRegressor clf = HungaBungaClassifier() clf.fit(x, y) clf.predict(x)
a[c] = str(f) c = c+1 #print(a) X = data[a] # Features y = data['0'] # Target variable # Split dataset into training set and test set X_c, X_r, y_c, y_r = train_test_split(X, y, test_size=0.25, random_state=1) # 75% training and 25% test # iris = datasets.load_iris() # X_c, y_c = iris.data, iris.target # X_r, y_r = gen_reg_data(10, 3, 100, 3, sum, 0.3) # ---------- Classification ---------- clf = HungaBungaClassifier() clf.fit(X_c, y_c) print(clf.predict(X_c)) # ---------- Regression ---------- # mdl = HungaBungaRegressor() # mdl.fit(X_r, y_r) # print(mdl.predict(X_c))
fn_d = 'Enterobase.xlsx' WS = pd.read_excel(fn_d) sb = WS['strain_barcode'] st = WS['source_type'] y_ = [] for i in range(len(g_id)): g_id[i] = g_id[i].strip('>') y_.append(st[g_id[i] == sb].ravel()[0]) y = np.array(y_) y[y == 'Human'] = 0 y[y == 'Avian'] = 1 y = y.astype('int') ## Filter Features clf = ExtraTreesClassifier(n_estimators=50) clf = clf.fit(X, y) clf.feature_importances_ selectmodel = SelectFromModel(clf, prefit=True) X_new = selectmodel.transform(X) ## Hunga Bunga clf = HungaBungaClassifier() clf.fit(X_new, y)