X = data.drop("label", axis=1).values y = data["label"].values X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) #%% # My Algorithm from RandomForest import RandomForestClassifier model = RandomForestClassifier(nb_trees=200, max_depth=50, n_estimators=300, max_workers=5) model.fit(X_train, y_train) print(model.score(X_test, y_test)) joblib.dump(model, "data/my_random_forest.joblib") #%% from sklearn.ensemble import RandomForestClassifier as RFC rf = RFC(max_depth=50, n_estimators=300, criterion='entropy', verbose=0, max_features="sqrt") rf.fit(X_train, y_train) print(rf.score(X_test, y_test))
] informasi = [ labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0, 3) if y == 2 ] trainAnjuran = [trainf[x][1:] + [anjuran[x]] for x in range(len(trainf))] trainLarangan = [trainf[x][1:] + [larangan[x]] for x in range(len(trainf))] trainInformasi = [ trainf[x][1:] + [informasi[x]] for x in range(len(trainf)) ] # In[57]: # ## Build Model & Prediction modelA = RandomForestClassifier(rf_trees=80, rf_samples=1000) modelL = RandomForestClassifier(rf_trees=80, rf_samples=1000) modelI = RandomForestClassifier(rf_trees=80, rf_samples=1000) modelA.fit(trainAnjuran) modelL.fit(trainLarangan) modelI.fit(trainInformasi) predictA = [] predictL = [] predictI = [] for y in range(0, len(testf)): predictA.append(modelA.predict(testf[y][1:])) predictL.append(modelL.predict(testf[y][1:])) predictI.append(modelI.predict(testf[y][1:])) prediction = [[predictA[x]] + [predictL[x]] + [predictI[x]] for x in range(len(predictA))] hammingloss, index = processor.getHammingLoss(labeltest, prediction) performance.append(hammingloss) index.append(index)
def split(dtrain, dtest): train = [dtrain[x][0:-2] for x in range(len(dtrain))] test = [dtest[x][0:-2] for x in range(len(dtest))] labeltrain = [dtrain[x][-1] for x in range(len(dtrain))] labeltest = [dtest[x][-2] for x in range(len(dtest))] return train, test, labeltrain, labeltest index = [] performance = [] for x in range(5): trainf, testf, labeltrain, labeltest = split(train[x], test[x]) model = RandomForestClassifier(rf_trees=80, rf_samples=1000) trainfix = [ trainf[x][1:] + [labeltrain[x]] for x in range(len(labeltrain)) ] model.fit(trainfix) prediction = [] for y in range(0, len(testf)): prediction.append(model.predict(testf[y][1:])) hammingloss, index = (processor.getHammingLoss(labeltest, prediction)) index.append(index) performance.append(hammingloss) # ## Split Data #train, validation, labeltrain, labelval = train_test_split(extraction, label, test_size=0.30, random_state=42) #anjuran = [labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0,3) if y==0] #larangan =[labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0,3) if y==1] #informasi = [labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0,3) if y==2]