def _convert_to_export(self): destination = cache_path('{}{}.hdf'.format( self.mp.preprocessing_name(), csv_extension())) if os.path.exists(destination): return pandas.read_hdf(destination, 'export') training_data = self._training_data().copy() training_data['use_for_classifier_training'] = True training_data['use_for_score_calculation'] = False training_data['predict_sentiment'] = False training_data['predicted_sentiment'] = None unlabeled_training_data = self._unlabeled_training_data().copy() unlabeled_training_data['use_for_classifier_training'] = False unlabeled_training_data['use_for_score_calculation'] = False unlabeled_training_data['sentiment'] = None unlabeled_training_data['predict_sentiment'] = False unlabeled_training_data['predicted_sentiment'] = None if not use_different_source(): self._preprocess_test_data() testing_data = self._testing_data() testing_data['use_for_classifier_training'] = False testing_data['use_for_score_calculation'] = True testing_data['predict_sentiment'] = True testing_data['predicted_sentiment'] = None reviews = self._post_process( [training_data, unlabeled_training_data, testing_data]) reviews.to_hdf(destination, 'export', mode='w') return reviews
def _convert_to_export(self): destination = cache_path('{}{}.hdf'.format(self.mp.preprocessing_name(), csv_extension())) if os.path.exists(destination): return pandas.read_hdf(destination, 'export') training_data = self._training_data().copy() training_data['use_for_classifier_training'] = True training_data['use_for_score_calculation'] = False training_data['predict_sentiment'] = False training_data['predicted_sentiment'] = None unlabeled_training_data = self._unlabeled_training_data().copy() unlabeled_training_data['use_for_classifier_training'] = False unlabeled_training_data['use_for_score_calculation'] = False unlabeled_training_data['sentiment'] = None unlabeled_training_data['predict_sentiment'] = False unlabeled_training_data['predicted_sentiment'] = None if not use_different_source(): self._preprocess_test_data() testing_data = self._testing_data() testing_data['use_for_classifier_training'] = False testing_data['use_for_score_calculation'] = True testing_data['predict_sentiment'] = True testing_data['predicted_sentiment'] = None reviews = self._post_process([training_data, unlabeled_training_data, testing_data]) reviews.to_hdf(destination, 'export', mode='w') return reviews
def save_reviews_with_topics(self, reviews): destination = cache_path('{}{}.hdf'.format( self.mp.preprocessing_name(), csv_extension())) reviews.to_hdf(destination, 'export', mode='w')
def save_reviews_with_topics(self, reviews): destination = cache_path('{}{}.hdf'.format(self.mp.preprocessing_name(), csv_extension())) reviews.to_hdf(destination, 'export', mode='w')