Example #1
0
    def train(self, dataset_path, features=None, target=None, **kwargs):
        # Record features & target
        self._features = features
        self._target = target

        # Load CSV file as pandas dataframe
        csv_path = dataset_path
        data = pd.read_csv(csv_path)

        # Extract X & y from dataframe
        (X, y) = self._extract_xy(data)

        # Encode categorical features
        X = self._encoding_categorical_type(X)

        num_class = y.unique().size

        self._clf = self._build_classifier(self.n_estimators, self.min_child_weight, \
            self.max_depth, self.gamma, self.subsample, self.colsample_bytree, num_class)

        self._clf.fit(X, y)

        # Compute train accuracy
        score = self._clf.score(X, y)
        logger.log('Train accuracy: {}'.format(score))
Example #2
0
 def train(self, dataset_uri):
     dataset = dataset_utils.load_dataset_of_corpus(dataset_uri)
     (sents_tokens, sents_tags) = zip(*[zip(*sent) for sent in dataset])
     self._num_tags = dataset.tag_num_classes[0]
     (self._trans_probs,
      self._emiss_probs) = self._compute_probs(self._num_tags, sents_tokens,
                                               sents_tags)
     logger.log('No. of tags: {}'.format(self._num_tags))
Example #3
0
    def train(self, dataset_uri):
        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri)
        (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
        X = self._prepare_X(images)
        y = classes
        self._clf.fit(X, y)

        # Compute train accuracy
        preds = self._clf.predict(X)
        accuracy = sum(y == preds) / len(y)
        logger.log('Train accuracy: {}'.format(accuracy))
Example #4
0
    def evaluate(self, dataset_uri):
        im_sz = self._knobs.get('image_size')

        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
        (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
        images = np.asarray(images)
        classes = np.asarray(classes)

        with self._graph.as_default():
            with self._sess.as_default():
                (loss, accuracy) = self._model.evaluate(images, classes)
                logger.log('Test loss: {}'.format(loss))

        return accuracy
Example #5
0
    def _train(self, dataset):
        N = self._knobs.get('batch_size')
        ep = self._knobs.get('epochs')
        null_tag = self._tag_count  # Tag to ignore (from padding of sentences during batching)
        B = math.ceil(len(dataset) / N)  # No. of batches

        # Define 2 plots: Loss against time, loss against epochs
        logger.define_loss_plot()
        logger.define_plot('Loss Over Time', ['loss'])

        (net, optimizer) = self._create_model()

        Tensor = torch.LongTensor
        if torch.cuda.is_available():
            logger.log('Using CUDA...')
            net = net.cuda()
            Tensor = torch.cuda.LongTensor

        loss_func = nn.CrossEntropyLoss(ignore_index=null_tag)

        for epoch in range(ep):
            total_loss = 0
            for i in range(B):
                # Extract batch from dataset
                (words_tsr,
                 tags_tsr) = self._prepare_batch(dataset, i * N, i * N + N,
                                                 Tensor)

                # Reset gradients for this batch
                optimizer.zero_grad()

                # Forward propagate batch through model
                probs_tsr = net(words_tsr)

                # Compute sum of per-word loss for all words & sentences
                NW = probs_tsr.size(0) * probs_tsr.size(1)
                loss = loss_func(probs_tsr.view(NW, -1), tags_tsr.view(-1))

                # Backward propagate on minibatch
                loss.backward()

                # Update gradients with optimizer
                optimizer.step()

                total_loss += loss.item()

            logger.log_loss(loss=(total_loss / B), epoch=epoch)

        return (net, optimizer)
Example #6
0
    def train(self, dataset_path, features=None, target=None):
        dataset = dataset_utils.load_dataset_of_tabular(dataset_path)
        data = dataset.data
        if features is None:
            X = data.iloc[:, :-1]
        else:
            X = data[features]
        if target is None:
            y = data.iloc[:, -1]
        else:
            y = data[target]

        # Encode categorical features
        X = self._category_encoding_type(X, y)
        self._clf.fit(X, y)

        # Compute train root mean square error
        preds = self._clf.predict(X)
        rmse = np.sqrt(mean_squared_error(y, preds))
        logger.log('Train RMSE: {}'.format(rmse))
Example #7
0
    def train(self, dataset_path, features=None, target=None, **kwargs):
        # Record features & target
        self._features = features
        self._target = target

        # Load CSV file as pandas dataframe
        csv_path = dataset_path
        data = pd.read_csv(csv_path)

        # Extract X & y from dataframe
        (X, y) = self._extract_xy(data)

        # Encode categorical features
        X = self._encoding_categorical_type(X)

        self._clf.fit(X, y)

        # Compute train root mean square error
        preds = self._clf.predict(X)
        rmse = np.sqrt(mean_squared_error(y, preds))
        logger.log('Train RMSE: {}'.format(rmse))
Example #8
0
    def train(self, dataset_uri):
        im_sz = self._knobs.get('image_size')
        bs = self._knobs.get('batch_size')
        ep = self._knobs.get('epochs')

        logger.log('Available devices: {}'.format(str(device_lib.list_local_devices())))

        # Define 2 plots: Loss against time, loss against epochs
        logger.define_loss_plot()
        logger.define_plot('Loss Over Time', ['loss'])

        dataset = dataset_utils.load_dataset_of_image_files(dataset_uri, image_size=[im_sz, im_sz])
        num_classes = dataset.classes
        (images, classes) = zip(*[(image, image_class) for (image, image_class) in dataset])
        images = np.asarray(images)
        classes = np.asarray(classes)

        with self._graph.as_default():
            self._model = self._build_model(num_classes)
            with self._sess.as_default():
                self._model.fit(
                    images, 
                    classes, 
                    verbose=0,
                    epochs=ep,
                    batch_size=bs,
                    callbacks=[
                        tf.keras.callbacks.LambdaCallback(on_epoch_end=self._on_train_epoch_end)
                    ]
                )

                # Compute train accuracy
                (loss, accuracy) = self._model.evaluate(images, classes)
                logger.log('Train loss: {}'.format(loss))
                logger.log('Train accuracy: {}'.format(accuracy))
Example #9
0
    def _predict(self, dataset):
        N = self._knobs.get('batch_size')
        net = self._net
        B = math.ceil(len(dataset) / N)  # No. of batches
        word_count = len(self._word_dict)
        null_word = word_count

        Tensor = torch.LongTensor
        if torch.cuda.is_available():
            logger.log('Using CUDA...')
            net = net.cuda()
            Tensor = torch.cuda.LongTensor

        sents_pred_tags = []
        for i in range(B):
            # Extract batch from dataset
            (words_tsr, _) = self._prepare_batch(dataset,
                                                 i * N,
                                                 i * N + N,
                                                 Tensor,
                                                 has_tags=False)

            # Forward propagate batch through model
            probs_tsr = net(words_tsr)

            # Compute sum of per-word loss for all words & sentences
            _, preds_tsr = torch.max(probs_tsr, dim=2)

            # Populate predictions
            for (sent_preds_tsr, sent_words_tsr) in zip(preds_tsr, words_tsr):
                sent_pred_tags = []

                for (pred, word) in zip(sent_preds_tsr, sent_words_tsr):
                    if word.item() == null_word: break
                    sent_pred_tags.append(pred.item())

                sents_pred_tags.append(sent_pred_tags)

        return sents_pred_tags
Example #10
0
    def train(self, dataset_path, features=None, target=None):
        dataset = dataset_utils.load_dataset_of_tabular(dataset_path)
        data = dataset.data
        if features is None:
            X = data.iloc[:, :-1]
        else:
            X = data[features]
        if target is None:
            y = data.iloc[:, -1]
        else:
            y = data[target]

        # Encode categorical features
        X = self._category_encoding_type(X, y)
        num_class = y.unique().size

        self._clf = self._build_classifier(self.n_estimators, self.min_child_weight, \
            self.max_depth, self.gamma, self.subsample, self.colsample_bytree, num_class)

        self._clf.fit(X, y)

        # Compute train accuracy
        score = self._clf.score(X, y)
        logger.log('Train accuracy: {}'.format(score))
Example #11
0
    def train(self, dataset_uri):
        dataset = dataset_utils.load_dataset_of_corpus(dataset_uri)
        self._word_dict = self._extract_word_dict(dataset)
        self._tag_count = dataset.tag_num_classes[0]

        logger.log('No. of unique words: {}'.format(len(self._word_dict)))
        logger.log('No. of tags: {}'.format(self._tag_count))

        (self._net, self._optimizer) = self._train(dataset)
        sents_tags = self._predict(dataset)
        acc = self._compute_accuracy(dataset, sents_tags)

        logger.log('Train accuracy: {}'.format(acc))