Esempio n. 1
0
def test_name():
    filename = "name.txt"
    name_file = 'tests/test_files/' + filename
    output_dir = 'tests/test_files/redacted/'

    main.init_stats(name_file, 0, None)

    # Get test file
    content = main.get_file_contents(name_file)

    # Used to split the file for POS analysis
    word_punct_tokenizer = WordPunctTokenizer()
    tagged_content = nltk.pos_tag(word_punct_tokenizer.tokenize(content))

    # Redacte
    content = main.redact_names(content, tagged_content, name_file)

    # X nameed words in file
    assert (main.num_names[name_file] == 22)

    # Create path
    if (not os.path.isdir(output_dir)):
        sys.stderr.write("Output directory did not exist...creating " +
                         output_dir + "/\n")
        os.makedirs(output_dir)

    # Write out the redacted test file for reference
    main.write_redacted(content, name_file, output_dir)
Esempio n. 2
0
def test_concept():
    filename = "concept.txt"
    concept_file = 'tests/test_files/' + filename
    output_dir = 'tests/test_files/redacted/'

    main.init_stats(concept_file, 0, None)

    # Get test file
    content = main.get_file_contents(concept_file)

    # Used to split the file for POS analysis
    word_punct_tokenizer = WordPunctTokenizer()
    tagged_content = nltk.pos_tag(word_punct_tokenizer.tokenize(content))

    # Make required dot structure.
    # See https://stackoverflow.com/questions/2352181/how-to-use-a-dot-to-access-members-of-dictionary
    arg = {"concept": ["child"]}
    args = temp(arg)

    # Redacte
    content = main.redact_concept(content, concept_file, args)

    # X concept words in file
    assert (main.num_concept[concept_file] == 12)

    # Create path
    if (not os.path.isdir(output_dir)):
        sys.stderr.write("Output directory did not exist...creating " +
                         output_dir + "/\n")
        os.makedirs(output_dir)

    # Write out the redacted test file for reference
    main.write_redacted(content, concept_file, output_dir)
def test_address():
    filename = "addresses.txt"
    address_file = 'tests/test_files/' + filename
    output_dir = 'tests/test_files/redacted/'

    main.init_stats(address_file, 0, None)

    # Get test file
    content = main.get_file_contents(address_file)

    # Redacte
    content = main.redact_addresses(content, address_file)

    # Three addressses in file
    assert (main.num_addresses[address_file] == 3)

    # Create path
    if (not os.path.isdir(output_dir)):
        sys.stderr.write("Output directory did not exist...creating " +
                         output_dir + "/\n")
        os.makedirs(output_dir)

    # Write out the redacted test file for reference
    main.write_redacted(content, address_file, output_dir)