Beispiel #1
0
    #def get_
from sklearn.tree import DecisionTreeClassifier
train = corpus.load_corpus(all=True)
statistic = analytics.load_analytics(train)
heighest_probabilty = {}

for i in statistic:
    heighest_probabilty[i] = max(statistic[i].items(),key=lambda x:x[1])[0]

X_train_raw, Y_train_raw = extract_feature(data=train)

#Global label_encoder to encode X values
global_label_encoder,global_hot_encoder = set_encoder(Y_train_raw)
print("Training Global Classifer ....")
X_train,Y_train = encode_features(X_train_raw,Y_train_raw,global_label_encoder,global_hot_encoder)
global_clf = DecisionTreeClassifier()
global_clf.fit(X_train,Y_train)
print("Completed")

# print(train)
# Identify the ambiguity classes
amb_class = {}
for i in train:
    for x,y in enumerate(i):
        #If the word only has one tagging, we don't need a classifier
        if len(statistic[y[0]]) == 1:
            pass
        #If there is an ambiguity, we need a decission tree classifier

        else:
from  features import extract_feature, set_encoder,encode_features
from  corpus import  load_corpus

from sklearn.tree import  DecisionTreeClassifier

X_train_raw, Y_train_raw = extract_feature(data=load_corpus())

label_encoder,hot_encoder = set_encoder(Y_train_raw)

X_train,Y_train = encode_features(X_train_raw,Y_train_raw,label_encoder,hot_encoder)

clf = DecisionTreeClassifier()
clf.fit(X_train,Y_train)

X_test_raw,Y_test_raw = extract_feature(load_corpus(last=True))
X_test,Y_test = encode_features(X_test_raw,Y_test_raw,label_encoder,hot_encoder)
print(clf.score(X_test,Y_test))