def classify(self):
        self.tokenize()
        self.X_train, self.X_test, self.Y_train, self.Y_test = train_test_split(
            self.X, self.Y, test_size=0.20, random_state=42)
        if self.avoid_skewness:
            Y_train = np.argmax(self.Y_train, axis=1)
            Y_train = [self.labels_dict_rev[int(i)] for i in list(Y_train)]

            self.X_train, self.Y_train = BasicFunctions.getUnskewedSubset(
                self.X_train, self.Y_train, Y_train)
            self.X_train = np.array(self.X_train)
            self.Y_train = np.array(self.Y_train)
        self.printDataInformation()

        self.model = Sequential()
        # Single 500-neuron hidden layer with sigmoid activation
        self.model.add(
            Dense(input_dim=self.X.shape[1], units=500, activation='relu'))
        # Output layer with softmax activation
        self.model.add(Dense(units=self.Y.shape[1], activation='softmax'))
        # Specify optimizer, loss and validation metric
        self.model.compile(loss='categorical_crossentropy',
                           optimizer='adam',
                           metrics=['accuracy'])

        # Train the model
        self.model.fit(self.X_train,
                       self.Y_train,
                       epochs=4,
                       batch_size=10,
                       validation_split=0.2)
Exemple #2
0
 def fit(self, X, y):
     label_distribution = BasicFunctions.keyCounter(y)
     highest_amount = 0
     for label in label_distribution:
         if label_distribution[
                 label] > highest_amount or highest_amount == 0:
             highest_amount = label_distribution[label]
             self.most_frequent_class = label
    def evaluate(self):
        self.Y_predicted = self.model.predict(self.X_test)
        self.Y_predicted = np.argmax(self.Y_predicted, axis=1)
        self.Y_predicted = [
            self.labels_dict_rev[int(i)] for i in list(self.Y_predicted)
        ]

        self.Y_test = np.argmax(self.Y_test, axis=1)
        self.Y_test = [self.labels_dict_rev[int(i)] for i in list(self.Y_test)]

        self.accuracy, self.precision, self.recall, self.f1score = BasicFunctions.getMetrics(
            self.Y_test, self.Y_predicted, self.labels)
 def printClassEvaluation(self):
     BasicFunctions.printClassEvaluation(self.Y_test, self.Y_predicted,
                                         self.labels)
 def printBasicEvaluation(self):
     BasicFunctions.printEvaluation(self.accuracy, self.precision,
                                    self.recall, self.f1score,
                                    "Basic Evaluation")
    def evaluate(self):
        self.Y_predicted = self.classifier.predict(self.X_test)

        self.accuracy, self.precision, self.recall, self.f1score = BasicFunctions.getMetrics(
            self.Y_test, self.Y_predicted, self.labels)
Exemple #7
0
                    default='Word Sense Disambiguation',
                    help='word sense')
parser.add_argument(
    '--avoid_skewness',
    type=bool,
    default=False,
    help=
    'how to train the dataset, without skewness in the data or with skewness')
parser.add_argument(
    '--kfold',
    type=int,
    default=1,
    help='Amount of Ks for cross validation, if cross validation.')
args = parser.parse_args()

predict_languages = BasicFunctions.getLanguages(args.predict_languages)

data = data('./train/', './test_00/')
data.collectXY(data_method=args.data_method)

BasicFunctions.printStandardText(args.method, predict_languages,
                                 args.predict_label)
labels = list(set(data.Y))
#BasicFunctions.printLabelDistribution(data.Y_train)

words = list(set(data.words))
#print(data.X_train)
#BasicFunctions.printLabelDistribution(data.words_train)

if len(labels) > 1:  #otherwise, there is nothing to train