from sklearn import preprocessing from extract_drugs import getFeatureVectorAndLabels from sklearn.model_selection import StratifiedKFold, KFold import numpy as np from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score from ruleBasedAfterClass import getRuleBasedClassification from collections import defaultdict FOLDS = 8 featureNames = ['sections', 'containsFutureWord', 'prevSentContainsFutureWord',\ 'current_tense', 'prev_tense', 'temporalType',\ 'polarity', 'position', 'modality', 'proximity', 'futureCount'] np.set_printoptions(threshold=np.inf) data_dir = "../data/training_data" files, drugs, features, X, y = getFeatureVectorAndLabels(data_dir) labelEncoder = preprocessing.LabelEncoder() y_encoded = labelEncoder.fit_transform(y) skf = StratifiedKFold(n_splits=10) label1 = 0 label2 = 0 label3 = 0 average_accuracy_score = 0 for train, test in skf.split(X, y): SVMclassifier = SVC(kernel = 'rbf', class_weight='balanced', C = 100.0, gamma = 'auto') SVMclassifier.fit(X[train], y[train]) y_SVM_pred = SVMclassifier.predict(X[test]) features_test = defaultdict(list) for feature in featureNames: featureList = features[feature]
from extract_drugs import getFeatureVectorAndLabels from ruleBasedAfterClass import getRuleBasedClassification from collections import defaultdict featureNames = ['sections', 'containsFutureWord', 'prevSentContainsFutureWord',\ 'current_tense', 'prev_tense', 'temporalType',\ 'polarity', 'position', 'modality', 'proximity', 'futureCount'] data_dir = "../data/test_data" np.set_printoptions(threshold=np.inf) if os.path.isfile('svmModelC.joblib'): SVMclassifier = load('svmModelC.joblib') else: allDrugEvents, features_train, X_train, y_train = getFeatureVectorAndLabels( data_dir) labelEncoder = preprocessing.LabelEncoder() y_train_encoded = labelEncoder.fit_transform(y_train) SVMclassifier = SVC(kernel='rbf', class_weight='balanced', C=1000.0, gamma='auto') SVMclassifier.fit(X_train, y_train_encoded) dump(SVMclassifier, 'svmModelC.joblib') files, drugs, features, X, y = getFeatureVectorAndLabels(data_dir) labelEncoder = preprocessing.LabelEncoder() y_encoded = labelEncoder.fit_transform(y) y_SVM_pred = SVMclassifier.predict(X)