Esempio n. 1
0
    def get(self, threshold):
        scanner = Services.DirectoryScanner(config.DIRECTORIES_TO_SCAN)
        interesting_service = Services.InterestingService(config.DEFAULT_INTERESTING_WEIGHT)
        document_parser = Services.DocumentParser()
        repo = Repositories.TxtRepository()
        counting_service = Services.WordCountingService(document_parser, interesting_service, threshold)

        for file in scanner.scan_files():
            for line in repo.read_file(file):
                for sentence in document_parser.split_to_sentences(line):
                    counting_service.populate(sentence, file)

        return counting_service.get_word_count()
Esempio n. 2
0
def main():
    scanner = Services.DirectoryScanner(config.DIRECTORIES_TO_SCAN, Services.CompanyRegistry())
    repo = Repositories.CsvRepository()
    file_validator = Services.FileValidator()
    new_files = []

    for new_file in scanner.scan_new_files():
        file, company = new_file
        metadata = repo.get_metadata(file)
        headers = repo.get_headers(file)
        statement_type = file_validator.get_statement_type(metadata)
        is_file_structure_valid = file_validator.is_file_structure_valid(headers)
        is_file_in_good_dir = file_validator.is_file_in_good_dir(statement_type[0], file)
        new_files.append(Entities.ScannedFileResult(file, statement_type, company, is_file_in_good_dir,
                                                    is_file_structure_valid))

    print(f'new files of unseen companies: {len(new_files)}')
    for new_file in new_files:
        print(f'\n'
              f'company: {new_file.company}\n'              
              f'file path: {new_file.path}\n'
              f'statement type: {new_file.statement_type[1]}\n'
              f'is file in good dir: {new_file.is_file_in_good_dir}\n'
              f'is file valid: {new_file.is_valid}')