def _classify(self): classifier = Classifier(self.preprocessed_contents, max_pages=self.max_pages, webpage_title=self._website.get_title()) classifier.classify() self.webpage_topic_is_plural = classifier.is_webpage_topic_plural()
if __name__ == "__main__": file_handler = FileHandler() configuration_provider = ConfigurationProvider(configuration_file) tree_browser = HTMLTreeBrowser() specification_registry = SpecificationRegistry(configuration_provider, tree_browser) content_downloader = WebPageContentDownloader() tree_builder = HTMLTreeBuilder() configuration_generator = ConfigurationGenerator(configuration_provider, specification_registry, content_downloader, tree_builder) url_map = file_handler.get_url_map(configuration_provider.get_classified_input_file_name()) configuration_generator.generate_configuration(url_map) url_map_to_classify = file_handler.get_url_map(configuration_provider.get_unclassified_input_file_name()) classifier = Classifier(configuration_provider, specification_registry, content_downloader, tree_builder) classification = classifier.classify(url_map_to_classify.keys()) file_handler.write_classification(configuration_provider.get_output_file_name(), classification) SummaryPrinter().print_summary(classification, url_map_to_classify)
from classification.classifier import Classifier from classification.text_processor import TextProcessor from preprocess.stemmer import Stemmer from preprocess.tokenizer import Tokenizer text_processor = TextProcessor(Stemmer(), Tokenizer()) classifier = Classifier(text_processor) while True: user_input = input() print(classifier.classify(user_input))