예제 #1
0
def train_HOMER(trainX_embedded, trainY):
    trainX_embedded_sp = sparse.lil_matrix(trainX_embedded)
    trainY_sp = sparse.lil_matrix(trainY)

    #meka_classpath = download_meka()
    model = Meka(
        #meka_classifier="meka.classifiers.multilabel.BR",  # Binary Relevance
        #meka_classifier = "meka.classifiers.multilabel.CC",
        meka_classifier = "meka.classifiers.multilabel.MULAN -S HOMER.BalancedClustering.%d.BinaryRelevance" % FLAGS.num_clusters,
        #weka_classifier = "weka.classifiers.functions.Logistic",
        #weka_classifier = "weka.classifiers.functions.SMO -C 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 1\"",
        weka_classifier = "weka.classifiers.functions.LibSVM -C %d -G %d" % (FLAGS.C, FLAGS.gamma),
        #weka_classifier = "weka.classifiers.functions.LibSVM",
        meka_classpath=download_meka(), # obtained via download_meka # for PC
        java_command='java'  # path to java executable
    )
    model.fit(trainX_embedded_sp, trainY_sp)
    return model
예제 #2
0
def train_svm(trainX_embedded, trainY, mode=FLAGS.mode):
    #trainX_embedded_sp = sparse.lil_matrix(trainX_embedded)
    #trainY_sp = sparse.lil_matrix(trainY)
    #print(FLAGS.C,FLAGS.gamma)
    #meka_classpath = download_meka()
    if mode == 'rbf':
        model = Meka(
            meka_classifier="meka.classifiers.multilabel.BR",  # Binary Relevance
            #meka_classifier = "meka.classifiers.multilabel.CC",
            #meka_classifier = "meka.classifiers.multilabel.MULAN -S HOMER.BalancedClustering.3.BinaryRelevance",
            #weka_classifier = "weka.classifiers.functions.Logistic",
            #weka_classifier = "weka.classifiers.functions.SMO -C 1 -K \"weka.classifiers.functions.supportVector.RBFKernel -C 250007 -G 1\"",
            weka_classifier=
            "weka.classifiers.functions.LibSVM -M 5000 -Z -C %f -G %f" %
            (FLAGS.C, FLAGS.gamma),
            #see https://weka.sourceforge.io/doc.stable/weka/classifiers/functions/LibSVM.html
            meka_classpath=
            "/exports/eddie/scratch/hdong3/scikit_ml_learn_data/meka/meka-release-1.9.2/lib/",  # for eddie server
            #meka_classpath=download_meka(), # obtained via download_meka # for PC
            java_command='java'  # path to java executable
        )
    elif mode == 'linear':
        model = Meka(
            meka_classifier="meka.classifiers.multilabel.BR",  # Binary Relevance
            weka_classifier="weka.classifiers.functions.LibLINEAR -C %f" %
            (FLAGS.C),
            #see https://weka.sourceforge.io/doc.stable/weka/classifiers/functions/LibLINEAR.html
            meka_classpath=
            "/exports/eddie/scratch/hdong3/scikit_ml_learn_data/meka/meka-release-1.9.2/lib/",  # for eddie server
            java_command='java'  # path to java executable
        )
    else:
        print('kernal mode unrecognised: using rbf kernel')
        model = Meka(
            meka_classifier="meka.classifiers.multilabel.BR",  # Binary Relevance
            weka_classifier=
            "weka.classifiers.functions.LibSVM -M 200 -C %f -G %f" %
            (FLAGS.C, FLAGS.gamma),
            meka_classpath=
            "/exports/eddie/scratch/hdong3/scikit_ml_learn_data/meka/meka-release-1.9.2/lib/",  # for eddie server
            java_command='java'  # path to java executable
        )
    model.fit(trainX_embedded, trainY)
    return model
예제 #3
0
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss
from skmultilearn.ext import Meka

X, y = make_multilabel_classification(sparse=True, return_indicator='sparse')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)
"""
meka_classifier = "meka.classifiers.multilabel.PCC",
weka_classifier = "weka.classifiers.functions.Logistic",
"""
meka = Meka(
    meka_classifier="meka.classifiers.multilabel.LC",
    weka_classifier="weka.classifiers.bayes.NaiveBayes",
    meka_classpath="/home/loki/Downloads/meka-release-1.9.3-SNAPSHOT/lib/",
    java_command="/usr/bin/java")

try:
    meka.fit(X_train, y_train)
except Exception as e:
    print(e)
    print(e.args[0].decode('utf8'))

predictions = meka.predict(X_test)

hamming_loss(y_test, predictions)

print(predictions)
예제 #4
0
def class_multi_label(x, Y, model, wekamodelname, value):

    # detect is the data classification is a multi-label problem.
    num_of_labels = Y.ndim
    print("\n\n-----------------------------------------------------------\n")
    if (num_of_labels == 1):
        print("This is not a multi-label problem!!!!!!")
        return model
    javapath = "C:\\" "Program Files" "\\Java\\jre1.8.0_251\\bin\\javaw.exe"

    myclasspath = download_meka()
    print(myclasspath)
    try:
        while 1:
            if (value < 1) or (value > 9):
                print("This is a Multi label problem")
                print("Please select:")
                print("1. For binary relevance")
                print("2. For pairwise comparison")
                print("3. Calibrated label ranking")
                print("4. Chain classifier ")
                print("5. PowerSet no pruning ")
                print("6. PowerSet with pruning ")
                print("7. Random-k Labelsets ")
                print("8. Pairwise comparison ")
                print("9. Multi Label knn ")
                value = input("Please enter a choice:\n")

            if value == 1:
                print("Applying binary relevance")
                #clf=BinaryRelevance(classifier=model,require_dense=[False, True])
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.BR",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 2:
                print("Fourclass Pairwise")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.FW",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 3:
                print("Applying calibrated label ranking")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.MULAN",
                    weka_classifier=wekamodelname + " -S CLR",
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break

            elif value == 4:
                print("Applying Chain Classifier")
                ##clf = ClassifierChain(classifier=model,require_dense=[False, True])

                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.CC",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 5:
                print("Applying powerset NO pruning")
                clf = LabelPowerset(classifier=model,
                                    require_dense=[False, True])
                break
            elif value == 6:
                print("Applying powerset with pruning")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.PS",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 7:
                print("Applying Random-k Labelsets")
                try:
                    clf = RakelD(base_classifier=model,
                                 base_classifier_require_dense=[False, True],
                                 labelset_size=4)
                except:
                    print("RakelD  exception")
                break
            elif value == 8:
                print("Monte-Carlo Classifier Chains")
                if wekamodelname == "nothing":
                    print("WEKA does not support this classifier")
                    clf = 0
                    break
                clf = Meka(
                    meka_classifier="meka.classifiers.multilabel.MCC",
                    weka_classifier=wekamodelname,
                    meka_classpath=myclasspath,
                    java_command=javapath  # path to java executable
                )
                break
            elif value == 9:
                print("Applying Multilabel k Nearest Neighbours")
                try:
                    clf = MLkNN(k=3)
                except:
                    print("Multilabel k Nearest Neighbours exception")
                break

            else:
                print("Try again!!!!")
    except:
        print("\nSomething went wrong, but continue\n")
    return clf
예제 #5
0
from sklearn.metrics import hamming_loss
from skmultilearn.ext import Meka
import os

home = os.environ['HOME']
print(home)

# wrd = pd.read_csv('/mnt/d/Documents/uni/The Final Project/TheSuperQuestionTyper/Primary_data/1000word_vector_Q.csv')
# tpc = pd.read_csv('/mnt/d/Documents/uni/The Final Project/TheSuperQuestionTyper/Primary_data/topic_vector_Q.csv')
#
# tpc.drop('tpc41', axis=1, inplace=True)
# tpc.drop('tpc42', axis=1, inplace=True)
# X = wrd.values
# y = tpc.values
X, y = make_multilabel_classification(sparse=True, return_indicator='sparse')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

meka = Meka(meka_classifier="meka.classifiers.multilabel.LC",
            weka_classifier="weka.classifiers.bayes.NaiveBayes",
            meka_classpath="/opt/meka-1.9/lib/",
            java_command='/usr/bin/java')
meka.fit(X_train, y_train)

print(X_test[15])
try:
    predictions = meka.predict(X_test)
    hamming_loss(y_test, predictions)
except:
    print(meka.error)
예제 #6
0
 def classifier(self):
     meka_classifier = "meka.classifiers.multilabel.BR"
     weka_classifier = "weka.classifiers.rules.ZeroR"
     return Meka(meka_classifier=meka_classifier,
                 weka_classifier=weka_classifier)
예제 #7
0
# -*- coding:utf-8 -*-

from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import hamming_loss
from skmultilearn.ext import Meka

from skmultilearn.adapt import MLkNN

X, y = make_multilabel_classification(sparse=True, return_indicator='sparse')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

meka = Meka(meka_classifier="meka.classifiers.multilabel.LC",
            meka_classpath="F:\\mekalib\\",
            java_command="d:\\Program Files\\Java/jdk1.8.0_60\\jre\\bin\\java")

meka.fit(X_train, y_train)

predictions = meka.predict(X_test)

hamming_loss(y_test, predictions)