Esempio n. 1
0
    def test_config_gen(self):
        test_user = '******'  #arbitrary user
        default_headers = [
            'UT', 'AU', 'DE', 'ID', 'TI', 'WC', 'CR', 'C1', 'PY', 'J9', 'VL',
            'BP', 'DI', 'PT', 'DT', 'TC'
        ]
        spanYears = [[100, 500],
                     [200, 600]]  #arbitrary values to retrieve from dict
        config.spanYears = spanYears

        #Test with user for the website
        CONFIG = config.gen(test_user, default_headers)
        self.assertEqual([100, 500], CONFIG['spans']['span_name_0']['years'])
        self.assertEqual([200, 600], CONFIG['spans']['span_name_1']['years'])

        #Test locally for the scripts only (trigerred when the user is a blank space)
        CONFIG = config.gen(' ', default_headers)
        self.assertEqual([100, 500], CONFIG['spans']['span_name_0']['years'])
        self.assertEqual([200, 600], CONFIG['spans']['span_name_1']['years'])
Esempio n. 2
0
spanYears = []

for range in vars(args)['bound']:
    boundPair = range.split(
        "-")  #string separated by '-' retains lowerbound-upperbound ordering
    intBoundPair = list(map(int, boundPair))
    spanYears.append(intBoundPair)

user = args.user
print(user)

# Run all scripts.
import config
config.spanYears = spanYears
CONFIG = config.gen(user, str(args.headers).split('-'))

print("\nMERGING CORPUS\n")
import merging_corpus
merging_corpus.CONFIG = CONFIG
merging_corpus.run()

print("\nPARSE AND GROUP\n")
import parse_and_group
parse_and_group.CONFIG = CONFIG
parse_and_group.run()

print("\nCORPUS PARSED OVERVIEW\n")
import corpus_parsed_overview
corpus_parsed_overview.CONFIG = CONFIG
corpus_parsed_overview.run()
Esempio n. 3
0
 def test_config_gen(self):
     spanYears = [[100,500], [200,600]] #arbitrary values to retrieve from dict
     config.spanYears = spanYears
     CONFIG = config.gen()
     self.assertEqual([100,500], CONFIG['spans']['span_name_0']['years'])
     self.assertEqual([200,600], CONFIG['spans']['span_name_1']['years'])
Esempio n. 4
0
                    nargs='+',
                    metavar='bound',
                    help='Year value',
                    default={"1900-1999", "2000-2018"})
args = parser.parse_args()

spanYears = []
for range in vars(args)['bound']:
    boundPair = range.split(
        "-")  #string separated by '-' retains lowerbound-upperbound ordering
    intBoundPair = list(map(int, boundPair))
    spanYears.append(intBoundPair)

import config
config.spanYears = spanYears
CONFIG = config.gen()

print("\nMERGING CORPUS\n")
import merging_corpus
merging_corpus.CONFIG = CONFIG
merging_corpus.run()

print("\nPARSE AND GROUP\n")
import parse_and_group
parse_and_group.CONFIG = CONFIG
parse_and_group.run()

print("\nCORPUS PARSED OVERVIEW\n")
import corpus_parsed_overview
corpus_parsed_overview.CONFIG = CONFIG
corpus_parsed_overview.run()
Esempio n. 5
0
class TestParsers(TestCase):

    """
    These tests assess the ability of parsers.py to parse a test input and
    separate the information into different categories.

    These test wos_parser, which tests all the units in the parsers.py file.
    """

    dir = os.path.dirname(os.path.dirname(__file__))
    default_headers = ['UT', 'AU', 'DE', 'ID', 'TI', 'WC', 'CR', 'C1', 'PY', 'J9', 'VL', 'BP', 'DI', 'PT', 'DT', 'TC']
    test_user = '******'
    CONFIG = config.gen(test_user, default_headers)
    parsers.CONFIG = CONFIG
    parsers.initHeaders()
    os.mkdir(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output"))
    test_input_txt = open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/input_parser.txt"), "r")
    parsers.wos_parser(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests"), os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output"), False)
    test_input_txt.close()

    """
    This test tests that after parsing articles, the output is correct with respects
    to a known-correct base folder FOR ARTICLES.
    """
    def test_parse_articles(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/articles.dat"), "r") as myfile:
            output = myfile.read().replace("\n", "")

        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/desired_output/articles.dat"), "r") as myfile:
            desired_output = myfile.read().replace("\n", "")

        self.assertEqual(output, desired_output)

    """
    This test tests that after parsing articles, the output is correct with respects
    to a known-correct base folder FOR AUTHORS.
    """
    def test_parse_authors(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/authors.dat"), "r") as myfile:
            output = myfile.read().replace("\n", "")

        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/desired_output/authors.dat"), "r") as myfile:
            desired_output = myfile.read().replace("\n", "")

        self.assertEqual(output, desired_output)

    """
    This test tests that after parsing articles, the output is correct with respects
    to a known-correct base folder FOR COUNTRIES.
    """
    def test_parse_countries(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/countries.dat"), "r") as myfile:
            output = myfile.read().replace("\n", "")

        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/desired_output/countries.dat"), "r") as myfile:
            desired_output = myfile.read().replace("\n", "")

        self.assertEqual(output, desired_output)

    """
    This test tests that after parsing articles, the output is correct with respects
    to a known-correct base folder FOR ARTICLE KEYWORDS.
    """
    def test_parse_article_keywords(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/article_keywords.dat"), "r") as myfile:
            output = myfile.read().replace("\n", "")

        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/desired_output/article_keywords.dat"), "r") as myfile:
            desired_output = myfile.read().replace("\n", "")

        self.assertEqual(output, desired_output)

    """
    This test tests that after parsing articles, the output is correct with respects
    to a known-correct base folder FOR INSTITUTIONS.
    """
    def test_parse_institutions(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/institutions.dat"), "r") as myfile:
            output = myfile.read().replace("\n", "")

        with open(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/desired_output/institutions.dat"), "r") as myfile:
            desired_output = myfile.read().replace("\n", "")

        self.assertEqual(output, desired_output)

    """
    This test tests meticulously the output .dat files to ensure they are not corrupted.
    """
    def test_open_dat_files(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        os.makedirs(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder"))
        output_dir = os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder")
        open_files = parsers.open_dat_files(output_dir)

        for file_key in open_files:
            open_files[file_key].close()

        no_of_files_created = len(glob.glob("%s/*.dat" % output_dir))

        if os.path.exists(os.path.dirname(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/article_keywords.dat"))):
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/article_keywords.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/articles.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/authors.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/countries.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/institutions.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/isi_keywords.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/references.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/subjects.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder/title_keywords.dat"))

        if os.path.exists(os.path.dirname(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder"))):
            os.rmdir(os.path.join(dir, "tests_bibliotools/testFiles/test_open_dat_files_folder"))

        self.assertEqual(no_of_files_created, 9)

    """
    This is a test teardown that closes and removes any open/useless files.
    """
    def test_teardown(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        if os.path.exists(os.path.dirname(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/article_keywords.dat"))):
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/article_keywords.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/articles.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/authors.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/countries.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/institutions.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/isi_keywords.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/references.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/subjects.dat"))
            os.remove(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output/title_keywords.dat"))

        if os.path.exists(os.path.dirname(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output"))):
            os.rmdir(os.path.join(dir, "tests_bibliotools/testFiles/parser_tests/test_output"))

    """
    This test tests that all_txt_files returns a correct number
    when there is one .txt file in the given folder.
    """
    def test_all_txt_files_for_at_least_one_file(self):
        dir = os.path.dirname(os.path.dirname(__file__))
        directory = os.path.join(dir, "tests_bibliotools/testFiles")
        myfile = open(os.path.join(directory, "all_txt_file.txt"), "w")
        result = False
        if len(parsers.all_txt_files(directory)) >= 1:
            result = True
            myfile.close()
            os.remove(os.path.join(directory, "all_txt_file.txt"))
        self.assertEqual(True, result)