def getTestAccuracyData(self): classification = Classification.Classification() for i in range(80, 90): doc = reader.documents["essay" + str(i)] dataObjectList = [] annotatedData = set(doc.annotations) for annotation in annotatedData: dataObject = {"annotation": annotation.repr, "labels": annotation.labels.items(), "links": annotation.links} dataObjectList.append(dataObject) data = self.ExtractDataFeatures(dataObjectList, doc.key) preTrainingData = classification.prepareTrainingData(data) # arguments and links Arguments = preTrainingData[0] Links = preTrainingData[1] Arg_word_features = classification.getWordFeatures(Arguments) Link_word_features = classification.getWordFeatures(Links) classification.setWordfeatureSet(Arg_word_features) ArgumentTesting_set = nltk.classify.apply_features(classification.extract_features, Arguments) classification.setWordfeatureSet(Link_word_features) LinksTesting_set = nltk.classify.apply_features(classification.extract_features, Links) return [ArgumentTesting_set,LinksTesting_set]
def start(self): # perform some logging self.jlogger.info("Starting job with job id {}".format(self.job_id)) self.jlogger.debug("Job Config: {}".format(self.config)) self.jlogger.debug("Job Other Data: {}".format(self.job_data)) try: rud.ReadUserData(self) fg.FeatureGeneration(self, is_train=True) pp.Preprocessing(self, is_train=True) fs.FeatureSelection(self, is_train=True) fe.FeatureExtraction(self, is_train=True) clf.Classification(self) cv.CrossValidation(self) tsg.TestSetGeneration(self) tspp.TestSetPreprocessing(self) tsprd.TestSetPrediction(self) job_success_status = True except: job_success_status = False helper.update_running_job_status(self.job_id, "Errored") self.jlogger.exception("Exception occurred in ML Job {} ".format( self.job_id)) return job_success_status
def calculate_classification(self): results = Classification() for i in range(self.number_cross_validation): train_data, test_data = self.split_data_set(i) classified_data = self.__classify(train_data, test_data) result = ClassificationUnit(train_data, test_data, classified_data) results.add_unit(result) return results
def getFunction(mlalgorithm,pDict): """get the function corresponding to the algorithm passed as parameter.""" algList=["SVM", "RandomForest", "DecisionTree", "LogisticRegression","KNearstNeighbors"] cl=Classification(pDict) options={ "SVM":cl.fitSVM, "RandomForest":cl.fitRandomForest, "DecisionTree":cl.fitDecisionTree, "LogisticRegression":cl.fitLogisticRegression, "KNearstNeighbors":cl.fitKNearNeighbors } return options[mlalgorithm]
def __init__(self): self.primary_id = None self.other_ids = [] self.name = '' self.description = '' self.indication = '' self.pharmacodynamics = '' self.classification = Classification() self.synonyms = [] self.international_brands = [] self.categories = [] self.sequences = [] self.molecular_weight = '' self.molecular_formula = '' self.pathways_drugs = [] self.pathways_enzymes = [] self.atc_codes = []
import pandas as pd """ Pre-processing data return: a csv file """ input_file = input("Nhap file dau vao:") output_file = input("Nhap file dau ra:") data = Preprocessing(output_file) data.preprocessing(input_file) """ Drop label Separate output file to training set and test set """ data = pd.read_csv(output_file) X = data.drop(['RainTomorrow'], axis=1) y = data['RainTomorrow'] X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) """ Classification """ clf = Classification(X_train, X_test, y_train, y_test) clf.logistic_regression() clf.random_forest() clf.support_vector_machine() clf.decisiontree()
X_train, Y_train, X_validation, Y_validation, X_test, Y_test = loadMNIST( '/home/felippe/Área de Trabalho/Felippe/Mestrado/' 'Machine_Learning/DataBase/Computer_Vision/MNIST/') digit = 0 X_train, Y_train = separatingClasses(5400, digit, X_train, Y_train) X_validation, Y_validation = separatingClasses(540, digit, X_validation, Y_validation) X_test, Y_test = separatingClasses(1000, digit, X_test, Y_test) lr = logi_reg.LogisticRegression() #X must be in the form (N_X, M) lr.run((X_train.T) / 255, np.expand_dims(Y_train, axis=1).T) cl = clc.Classification() Y_pred = lr.predict((X_validation.T) / 255) #Finding the best threshold threshold = np.linspace(0.1, 0.9, 9) F1_best = -1 for i in threshold: Y_label_pred = cl.prob2Label(Y_pred, i) F1 = cl.F1_score(np.expand_dims(Y_validation, axis=1).T, Y_label_pred) if (F1 > F1_best): best_threshold = i F1_best = F1 print('Best Threshold: %f' % best_threshold) print('F1 Score in the Validation Set: %f' % F1_best)
import nltk import Classification import pickle from RelationAnalyzer import RelationAnalyzer from DataProcessing import DataProcess dataProcess = DataProcess.DataProcess() classification = Classification.Classification() # Get Raw data in Processed processedData = classification.getProcessedData() # prepare Training data from raw data preTrainingData = classification.prepareTrainingData(processedData) #arguments and links Arguments = preTrainingData[0] Links = preTrainingData[1] #get word features from training data Arg_word_features = classification.getWordFeatures(Arguments) Link_word_features = classification.getWordFeatures(Links) def Arg_Extract_features(document): document_words = set(document) features = {} for word in Arg_word_features: features['contains(%s)' % word] = (word in document_words) return features
def classify(pTestFile): """Perform the classification""" cl = Classification(pTestFile) cl.classify()
def nextWindow(self): self.window = QMainWindow() self.ui = Classification() self.ui.classification() self.close()