def classify(self): self.tokenize() self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split( self.X, self.Y, test_size=0.20, random_state=42) if self.avoid_skewness: Y_train = np.argmax(self.Y_train, axis=1) Y_train = [self.labels_dict_rev[int(i)] for i in list(Y_train)] self.X_train, self.Y_train = BasicFunctions.getUnskewedSubset( self.X_train, self.Y_train, Y_train) self.X_train = np.array(self.X_train) self.Y_train = np.array(self.Y_train) self.printDataInformation() self.model = Sequential() # Single 500-neuron hidden layer with sigmoid activation self.model.add( Dense(input_dim=self.X.shape[1], units=500, activation='relu')) # Output layer with softmax activation self.model.add(Dense(units=self.Y.shape[1], activation='softmax')) # Specify optimizer, loss and validation metric self.model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) # Train the model self.model.fit(self.X_train, self.Y_train, epochs=4, batch_size=10, validation_split=0.2)
def fit(self, X, y): label_distribution = BasicFunctions.keyCounter(y) highest_amount = 0 for label in label_distribution: if label_distribution[ label] > highest_amount or highest_amount == 0: highest_amount = label_distribution[label] self.most_frequent_class = label
def evaluate(self): self.Y_predicted = self.model.predict(self.X_test) self.Y_predicted = np.argmax(self.Y_predicted, axis=1) self.Y_predicted = [ self.labels_dict_rev[int(i)] for i in list(self.Y_predicted) ] self.Y_test = np.argmax(self.Y_test, axis=1) self.Y_test = [self.labels_dict_rev[int(i)] for i in list(self.Y_test)] self.accuracy, self.precision, self.recall, self.f1score = BasicFunctions.getMetrics( self.Y_test, self.Y_predicted, self.labels)
def printClassEvaluation(self): BasicFunctions.printClassEvaluation(self.Y_test, self.Y_predicted, self.labels)
def printBasicEvaluation(self): BasicFunctions.printEvaluation(self.accuracy, self.precision, self.recall, self.f1score, "Basic Evaluation")
def evaluate(self): self.Y_predicted = self.classifier.predict(self.X_test) self.accuracy, self.precision, self.recall, self.f1score = BasicFunctions.getMetrics( self.Y_test, self.Y_predicted, self.labels)
default='Word Sense Disambiguation', help='word sense') parser.add_argument( '--avoid_skewness', type=bool, default=False, help= 'how to train the dataset, without skewness in the data or with skewness') parser.add_argument( '--kfold', type=int, default=1, help='Amount of Ks for cross validation, if cross validation.') args = parser.parse_args() predict_languages = BasicFunctions.getLanguages(args.predict_languages) data = data('./train/', './test_00/') data.collectXY(data_method=args.data_method) BasicFunctions.printStandardText(args.method, predict_languages, args.predict_label) labels = list(set(data.Y)) #BasicFunctions.printLabelDistribution(data.Y_train) words = list(set(data.words)) #print(data.X_train) #BasicFunctions.printLabelDistribution(data.words_train) if len(labels) > 1: #otherwise, there is nothing to train