def _preprocess_test_data(self):
        cheat_path = data_path('cheatTestData{}'.format(csv_extension()))
        if os.path.isfile(cheat_path):
            return

        test_data = self._load_data('testData{}'.format(csv_extension()))
        test_data['sentiment'] = test_data['id'].map(sentiment_from_id)
        test_data.to_csv(cheat_path, index=False, header=True)
コード例 #2
0
    def _preprocess_test_data(self):
        cheat_path = data_path('cheatTestData{}'.format(csv_extension()))
        if os.path.isfile(cheat_path):
            return

        test_data = self._load_data('testData{}'.format(csv_extension()))
        test_data['sentiment'] = test_data['id'].map(sentiment_from_id)
        test_data.to_csv(cheat_path, index=False, header=True)
    def _convert_to_export(self):
        destination = cache_path('{}{}.hdf'.format(
            self.mp.preprocessing_name(), csv_extension()))
        if os.path.exists(destination):
            return pandas.read_hdf(destination, 'export')

        training_data = self._training_data().copy()
        training_data['use_for_classifier_training'] = True
        training_data['use_for_score_calculation'] = False
        training_data['predict_sentiment'] = False
        training_data['predicted_sentiment'] = None

        unlabeled_training_data = self._unlabeled_training_data().copy()
        unlabeled_training_data['use_for_classifier_training'] = False
        unlabeled_training_data['use_for_score_calculation'] = False
        unlabeled_training_data['sentiment'] = None
        unlabeled_training_data['predict_sentiment'] = False
        unlabeled_training_data['predicted_sentiment'] = None

        if not use_different_source():
            self._preprocess_test_data()
        testing_data = self._testing_data()
        testing_data['use_for_classifier_training'] = False
        testing_data['use_for_score_calculation'] = True
        testing_data['predict_sentiment'] = True
        testing_data['predicted_sentiment'] = None

        reviews = self._post_process(
            [training_data, unlabeled_training_data, testing_data])
        reviews.to_hdf(destination, 'export', mode='w')
        return reviews
コード例 #4
0
    def _convert_to_export(self):
        destination = cache_path('{}{}.hdf'.format(self.mp.preprocessing_name(), csv_extension()))
        if os.path.exists(destination):
            return pandas.read_hdf(destination, 'export')

        training_data = self._training_data().copy()
        training_data['use_for_classifier_training'] = True
        training_data['use_for_score_calculation'] = False
        training_data['predict_sentiment'] = False
        training_data['predicted_sentiment'] = None

        unlabeled_training_data = self._unlabeled_training_data().copy()
        unlabeled_training_data['use_for_classifier_training'] = False
        unlabeled_training_data['use_for_score_calculation'] = False
        unlabeled_training_data['sentiment'] = None
        unlabeled_training_data['predict_sentiment'] = False
        unlabeled_training_data['predicted_sentiment'] = None

        if not use_different_source():
            self._preprocess_test_data()
        testing_data = self._testing_data()
        testing_data['use_for_classifier_training'] = False
        testing_data['use_for_score_calculation'] = True
        testing_data['predict_sentiment'] = True
        testing_data['predicted_sentiment'] = None

        reviews = self._post_process([training_data, unlabeled_training_data, testing_data])
        reviews.to_hdf(destination, 'export', mode='w')
        return reviews
 def _testing_data(self):
     if use_different_source():
         return self._load_all_data('testing')
     return self._load_data('cheatTestData{}'.format(csv_extension()))
 def _training_data(self):
     if use_different_source():
         return self._load_all_data('training')
     return self._load_data('labeledTrainData{}'.format(csv_extension()))
 def save_reviews_with_topics(self, reviews):
     destination = cache_path('{}{}.hdf'.format(
         self.mp.preprocessing_name(), csv_extension()))
     reviews.to_hdf(destination, 'export', mode='w')
コード例 #8
0
 def _unlabeled_training_data(self):
     if use_different_source():
         return self._load_all_data('unlabeled')
     return self._load_data('unlabeledTrainData{}'.format(csv_extension()))
コード例 #9
0
 def _testing_data(self):
     if use_different_source():
         return self._load_all_data('testing')
     return self._load_data('cheatTestData{}'.format(csv_extension()))
コード例 #10
0
 def save_reviews_with_topics(self, reviews):
     destination = cache_path('{}{}.hdf'.format(self.mp.preprocessing_name(), csv_extension()))
     reviews.to_hdf(destination, 'export', mode='w')