def predict(self, test_input, input_type, test_case_count=25):
     normalize = Normalize()
     if input_type == 'RANDOM_INPUT':
         input_count = 0
         for question in test_input:
             input_count += 1
             question_ = normalize.normalize(question)
             logging.debug('Test Case No.{}: {}'.format(
                 input_count, str(question)))
             logging.debug('-' * (len(question) + 16))
             logging.debug('Predicted Tags: {}'.format(
                 self.tag_predictor(question_)))
         logging.debug('')
     else:
         test_idx = np.random.randint(len(test_input), size=test_case_count)
         logging.debug("Predicted Vs Ground Truth for {} sample".format(
             test_case_count))
         logging.debug('-' * 50)
         logging.debug('')
         input_count = 0
         for idx in test_idx:
             input_count += 1
             test_case = idx
             question = str(X_test[test_case])
             logging.debug('Test Case No.{}: {}'.format(
                 input_count, question))
             logging.debug('-' * 100)
             logging.debug("Question ID:    {}".format(test_case))
             logging.debug('Predicted: ' + str(
                 self.tag_predictor(normalize.normalize_(
                     X_test[test_case]))))
             logging.debug('Ground Truth: ' + str(
                 self._tag_encoder.inverse_transform(
                     np.array([y_test[test_case]]))))
             logging.debug('\n')
Esempio n. 2
0
    def input(self):
        fin = open('F:\\data\\ml\\2\\page_blocks_test_feature.txt', 'r')
        lines = fin.readlines()
        row = 0
        for line in lines:
            list = line.strip('\n').split(' ')
            self.matx[row][0:10] = list
            row += 1
        Normalize.normalize(self.matx)

        fin = open('F:\\data\\ml\\2\\page_blocks_test_label.txt', 'r')
        lines = fin.readlines()
        row = 0
        for line in lines:
            list = line.strip('\n')
            self.label[row] = list[0]
            row += 1
    def predict(self, test_input, custom_input, test_case_count):
        normalize = Normalize()
        if custom_input:
            input_count = 0
            #prediction_df = pd.DataFrame(columns = ["Que No","Questions", "Predicted_Tags"])
            prediction_list = []
            for question in test_input:
                input_count += 1
                question_ = normalize.normalize(question)
                logging.debug('-' * (len(question) + 16))
                logging.debug('Test Case No.{}: {}'.format(
                    input_count, str(question)))
                predicted_tag = self.tag_predictor(question_)
                logging.debug('Predicted Tags: {}'.format(predicted_tag))
                prediction_list.append({
                    'que_no': input_count,
                    'questions': str(question),
                    'predicted_tags': predicted_tag
                })

                #logging.debug('')
            logging.debug('')
            return prediction_list

        else:
            test_idx = np.random.randint(len(test_input), size=test_case_count)
            logging.debug("Predicted Vs Ground Truth for {} sample(s)".format(
                test_case_count))
            logging.debug('-' * 50)
            logging.debug('')
            input_count = 0
            input_predicted_list = []
            prediction_score = 0
            predicted_tag_list = []
            prediction_list = []
            #pd.DataFrame(columns = ["Que No", "Questions", "Ground_Truth","Predicted_Tags"])
            for idx in test_idx:
                input_count += 1
                test_case = idx
                question = str(test_input[test_case])
                logging.debug('')
                logging.debug('-' * 100)
                logging.debug('Test Case No.{}:'.format(input_count))
                logging.debug("Question ID: {}".format(test_case))
                logging.debug('Question: {}'.format(question))
                predicted_tag = self.tag_predictor(
                    normalize.normalize_(question))
                predicted_tag_list.append(predicted_tag)
                ground_truth = self._tag_encoder.inverse_transform(
                    np.array([self._y_test[test_case]]))
                score = 0
                ground_truth_ = [*ground_truth[0]]
                #predicted_tag_ = [*predicted_tag]

                for tag in predicted_tag:
                    tags = [*tag]
                    for tag in tags:
                        if tag in ground_truth_:
                            if (len(tag) > 0):
                                score = 1
                                prediction_score += 1
                            break
                        else:
                            for gt_tag in ground_truth_:
                                if (gt_tag.startswith(tag)
                                        or tag.startswith(gt_tag)
                                    ) and len(gt_tag) > 0:
                                    score = 1
                                    prediction_score += 1
                                    break

                prediction_current = {
                    'que_no': input_count,
                    'questions': question,
                    'ground_truth': str(ground_truth),
                    'predicted_tags': str(predicted_tag)
                }
                prediction_list.append(prediction_current)

                # append row to the dataframe
                input_predicted_list.append(
                    [input_count, ground_truth, predicted_tag, score])

                # log the ground truth & prediction
                logging.debug('Predicted: ' + str(predicted_tag))
                logging.debug('Ground Truth: ' + str(ground_truth))
                logging.debug('\n')

            accuracy = prediction_score / input_count
            self._accuracy = accuracy
            return prediction_list