Esempio n. 1
0
from sklearn import preprocessing
from extract_drugs import getFeatureVectorAndLabels
from sklearn.model_selection import StratifiedKFold, KFold
import numpy as np
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score
from ruleBasedAfterClass import getRuleBasedClassification
from collections import defaultdict

FOLDS = 8
featureNames = ['sections', 'containsFutureWord', 'prevSentContainsFutureWord',\
                'current_tense', 'prev_tense', 'temporalType',\
                'polarity', 'position', 'modality', 'proximity', 'futureCount']

np.set_printoptions(threshold=np.inf)
data_dir = "../data/training_data"
files, drugs, features, X, y = getFeatureVectorAndLabels(data_dir)
labelEncoder = preprocessing.LabelEncoder()
y_encoded = labelEncoder.fit_transform(y)

skf = StratifiedKFold(n_splits=10)
label1 = 0
label2 = 0
label3 = 0
average_accuracy_score = 0
for train, test in skf.split(X, y):
    SVMclassifier = SVC(kernel = 'rbf',  class_weight='balanced', C = 100.0, gamma = 'auto')
    SVMclassifier.fit(X[train], y[train])
    y_SVM_pred = SVMclassifier.predict(X[test])
    features_test = defaultdict(list)
    for feature in featureNames:
        featureList = features[feature]
from extract_drugs import getFeatureVectorAndLabels
from ruleBasedAfterClass import getRuleBasedClassification
from collections import defaultdict

featureNames = ['sections', 'containsFutureWord', 'prevSentContainsFutureWord',\
                'current_tense', 'prev_tense', 'temporalType',\
                'polarity', 'position', 'modality', 'proximity', 'futureCount']

data_dir = "../data/test_data"

np.set_printoptions(threshold=np.inf)

if os.path.isfile('svmModelC.joblib'):
    SVMclassifier = load('svmModelC.joblib')
else:
    allDrugEvents, features_train, X_train, y_train = getFeatureVectorAndLabels(
        data_dir)
    labelEncoder = preprocessing.LabelEncoder()
    y_train_encoded = labelEncoder.fit_transform(y_train)
    SVMclassifier = SVC(kernel='rbf',
                        class_weight='balanced',
                        C=1000.0,
                        gamma='auto')
    SVMclassifier.fit(X_train, y_train_encoded)
    dump(SVMclassifier, 'svmModelC.joblib')

files, drugs, features, X, y = getFeatureVectorAndLabels(data_dir)

labelEncoder = preprocessing.LabelEncoder()
y_encoded = labelEncoder.fit_transform(y)

y_SVM_pred = SVMclassifier.predict(X)