X = data.drop("label", axis=1).values
y = data["label"].values

X_train, X_test, y_train, y_test = train_test_split(X,
                                                    y,
                                                    test_size=0.2,
                                                    random_state=42)
#%%
# My Algorithm
from RandomForest import RandomForestClassifier

model = RandomForestClassifier(nb_trees=200,
                               max_depth=50,
                               n_estimators=300,
                               max_workers=5)
model.fit(X_train, y_train)
print(model.score(X_test, y_test))

joblib.dump(model, "data/my_random_forest.joblib")

#%%
from sklearn.ensemble import RandomForestClassifier as RFC

rf = RFC(max_depth=50,
         n_estimators=300,
         criterion='entropy',
         verbose=0,
         max_features="sqrt")
rf.fit(X_train, y_train)
print(rf.score(X_test, y_test))
    ]
    informasi = [
        labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0, 3)
        if y == 2
    ]
    trainAnjuran = [trainf[x][1:] + [anjuran[x]] for x in range(len(trainf))]
    trainLarangan = [trainf[x][1:] + [larangan[x]] for x in range(len(trainf))]
    trainInformasi = [
        trainf[x][1:] + [informasi[x]] for x in range(len(trainf))
    ]
    # In[57]:
    # ## Build Model & Prediction
    modelA = RandomForestClassifier(rf_trees=80, rf_samples=1000)
    modelL = RandomForestClassifier(rf_trees=80, rf_samples=1000)
    modelI = RandomForestClassifier(rf_trees=80, rf_samples=1000)
    modelA.fit(trainAnjuran)
    modelL.fit(trainLarangan)
    modelI.fit(trainInformasi)
    predictA = []
    predictL = []
    predictI = []
    for y in range(0, len(testf)):
        predictA.append(modelA.predict(testf[y][1:]))
        predictL.append(modelL.predict(testf[y][1:]))
        predictI.append(modelI.predict(testf[y][1:]))
    prediction = [[predictA[x]] + [predictL[x]] + [predictI[x]]
                  for x in range(len(predictA))]
    hammingloss, index = processor.getHammingLoss(labeltest, prediction)
    performance.append(hammingloss)
    index.append(index)

def split(dtrain, dtest):
    train = [dtrain[x][0:-2] for x in range(len(dtrain))]
    test = [dtest[x][0:-2] for x in range(len(dtest))]
    labeltrain = [dtrain[x][-1] for x in range(len(dtrain))]
    labeltest = [dtest[x][-2] for x in range(len(dtest))]
    return train, test, labeltrain, labeltest


index = []
performance = []
for x in range(5):
    trainf, testf, labeltrain, labeltest = split(train[x], test[x])
    model = RandomForestClassifier(rf_trees=80, rf_samples=1000)
    trainfix = [
        trainf[x][1:] + [labeltrain[x]] for x in range(len(labeltrain))
    ]
    model.fit(trainfix)
    prediction = []
    for y in range(0, len(testf)):
        prediction.append(model.predict(testf[y][1:]))
    hammingloss, index = (processor.getHammingLoss(labeltest, prediction))
    index.append(index)
    performance.append(hammingloss)
# ## Split Data
#train, validation, labeltrain, labelval = train_test_split(extraction, label, test_size=0.30, random_state=42)
#anjuran = [labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0,3) if y==0]
#larangan =[labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0,3) if y==1]
#informasi = [labeltrain[x][y] for x in range(len(labeltrain)) for y in range(0,3) if y==2]