def get(self, threshold): scanner = Services.DirectoryScanner(config.DIRECTORIES_TO_SCAN) interesting_service = Services.InterestingService(config.DEFAULT_INTERESTING_WEIGHT) document_parser = Services.DocumentParser() repo = Repositories.TxtRepository() counting_service = Services.WordCountingService(document_parser, interesting_service, threshold) for file in scanner.scan_files(): for line in repo.read_file(file): for sentence in document_parser.split_to_sentences(line): counting_service.populate(sentence, file) return counting_service.get_word_count()
def main(): scanner = Services.DirectoryScanner(config.DIRECTORIES_TO_SCAN, Services.CompanyRegistry()) repo = Repositories.CsvRepository() file_validator = Services.FileValidator() new_files = [] for new_file in scanner.scan_new_files(): file, company = new_file metadata = repo.get_metadata(file) headers = repo.get_headers(file) statement_type = file_validator.get_statement_type(metadata) is_file_structure_valid = file_validator.is_file_structure_valid(headers) is_file_in_good_dir = file_validator.is_file_in_good_dir(statement_type[0], file) new_files.append(Entities.ScannedFileResult(file, statement_type, company, is_file_in_good_dir, is_file_structure_valid)) print(f'new files of unseen companies: {len(new_files)}') for new_file in new_files: print(f'\n' f'company: {new_file.company}\n' f'file path: {new_file.path}\n' f'statement type: {new_file.statement_type[1]}\n' f'is file in good dir: {new_file.is_file_in_good_dir}\n' f'is file valid: {new_file.is_valid}')