def test_parser(): '''test argument parsing for the command-line interface''' testargs = [ '', '-fv', '--xmltei', '--notables', '-u', 'https://www.example.org' ] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is True assert args.verbose is True assert args.notables is False assert args.xmltei is True assert args.URL == 'https://www.example.org' args = cli.map_args(args) assert args.output_format == 'xmltei' testargs = ['', '-out', 'csv', '-u', 'https://www.example.org'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is False assert args.verbose is False assert args.output_format == 'csv' # test args mapping testargs = ['', '--xml'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) args = cli.map_args(args) assert args.output_format == 'xml' args.xml = False args.csv = True args = cli.map_args(args) assert args.output_format == 'csv' args.csv = False args.json = True args = cli.map_args(args) assert args.output_format == 'json'
def test_parser(): '''test argument parsing for the command-line interface''' testargs = [ '', '-fv', '--xmltei', '--notables', '-u', 'https://www.example.org' ] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is True assert args.verbose is True assert args.notables is False assert args.xmltei is True assert args.URL == 'https://www.example.org' args = cli.map_args(args) assert args.output_format == 'xmltei' testargs = ['', '-out', 'csv', '-u', 'https://www.example.org'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is False assert args.verbose is False assert args.output_format == 'csv' # test args mapping testargs = ['', '--xml'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) args = cli.map_args(args) assert args.output_format == 'xml' args.xml, args.csv = False, True args = cli.map_args(args) assert args.output_format == 'csv' args.csv, args.json = False, True args = cli.map_args(args) assert args.output_format == 'json' # process_args args.inputdir = '/dev/null' args.verbose = True args.blacklist = os.path.join(TEST_DIR, 'resources/list-discard.txt') cli.process_args(args) assert len(args.blacklist) == 2 # filter testargs = [ '', '-i', 'resources/list-discard.txt', '--url-filter', 'test1', 'test2' ] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.inputfile == 'resources/list-discard.txt' assert args.url_filter == ['test1', 'test2'] resources_dir = os.path.join(TEST_DIR, 'resources') args.inputfile = os.path.join(resources_dir, 'list-discard.txt') args.blacklist == os.path.join(resources_dir, 'list-discard.txt') f = io.StringIO() with redirect_stdout(f): cli.process_args(args) assert len(f.getvalue()) == 0
def test_sysoutput(): '''test command-line output with respect to CLI arguments''' testargs = ['', '--csv', '-o', '/root/forbidden/'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) filepath, destdir = cli_utils.determine_output_path( args, args.outputdir, '') assert len(filepath) >= 10 and filepath.endswith('.csv') assert destdir == '/root/forbidden/' assert cli_utils.check_outputdir_status(args.outputdir) is False testargs = ['', '--xml', '-o', '/tmp/you-touch-my-tralala'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert cli_utils.check_outputdir_status(args.outputdir) is True # test fileslug for name filepath, destdir = cli_utils.determine_output_path(args, args.outputdir, '', new_filename='AAZZ') assert filepath.endswith('AAZZ.xml') # test json output args2 = args args2.xml, args2.json = False, True args2 = cli.map_args(args2) filepath2, destdir2 = cli_utils.determine_output_path(args, args.outputdir, '', new_filename='AAZZ') assert filepath2.endswith('AAZZ.json') # test directory counter assert cli_utils.determine_counter_dir('testdir', 0) == 'testdir/1' # test file writing testargs = ['', '--csv', '-o', '/dev/null/'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) result = 'DADIDA' cli_utils.write_result(result, args) # process with no counter assert cli_utils.process_result('DADIDA', args, None, None, DEFAULT_CONFIG) is None # test keeping dir structure testargs = ['', '-i', 'myinputdir/', '-o', 'test/', '--keep-dirs'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) filepath, destdir = cli_utils.determine_output_path( args, 'testfile.txt', '') assert filepath == 'test/testfile.txt' # test hash as output file name assert args.hash_as_name is False args.hash_as_name = True assert args.keep_dirs is True args.keep_dirs = False filepath, destdir = cli_utils.determine_output_path( args, 'testfile.txt', '') assert filepath == 'test/2jmj7l5rSw0yVb-vlWAYkK-YBwk.txt'
def test_parser(): '''test argument parsing for the command-line interface''' testargs = [ '', '-fv', '--xmltei', '--notables', '-u', 'https://www.example.org' ] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is True assert args.verbose is True assert args.notables is False assert args.xmltei is True assert args.URL == 'https://www.example.org' args = cli.map_args(args) assert args.output_format == 'xmltei' testargs = ['', '-out', 'csv', '-u', 'https://www.example.org'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is False assert args.verbose is False assert args.output_format == 'csv' # test args mapping testargs = ['', '--xml'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) args = cli.map_args(args) assert args.output_format == 'xml' args.xml, args.csv = False, True args = cli.map_args(args) assert args.output_format == 'csv' args.csv, args.json = False, True args = cli.map_args(args) assert args.output_format == 'json' # process_args args.inputdir = '/dev/null' args.verbose = True args.blacklist = os.path.join(TEST_DIR, 'resources/list-discard.txt') cli.process_args(args) assert len(args.blacklist) == 4
def test_parser(): '''test argument parsing for the command-line interface''' testargs = [ '', '-fvv', '--xmltei', '--notables', '-u', 'https://www.example.org' ] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is True assert args.verbose == 2 assert args.notables is False and args.no_tables is False assert args.xmltei is True assert args.URL == 'https://www.example.org' args = cli.map_args(args) assert args.output_format == 'xmltei' testargs = [ '', '-out', 'csv', '--no-tables', '-u', 'https://www.example.org' ] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.fast is False assert args.verbose == 0 assert args.output_format == 'csv' assert args.no_tables is False # test args mapping testargs = ['', '--xml', '--nocomments', '--precision', '--recall'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) args = cli.map_args(args) assert args.output_format == 'xml' and args.no_comments is False # combination possible (?) assert args.precision is True and args.recall is True args.xml, args.csv = False, True args = cli.map_args(args) assert args.output_format == 'csv' args.csv, args.json = False, True args = cli.map_args(args) assert args.output_format == 'json' testargs = ['', '--with-metadata'] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) args = cli.map_args(args) assert args.only_with_metadata is True # process_args args.inputdir = '/dev/null' args.verbose = 1 args.blacklist = os.path.join(RESOURCES_DIR, 'list-discard.txt') cli.process_args(args) assert len(args.blacklist) == 2 # filter testargs = [ '', '-i', 'resources/list-discard.txt', '--url-filter', 'test1', 'test2' ] with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert args.inputfile == 'resources/list-discard.txt' assert args.url_filter == ['test1', 'test2'] args.inputfile = os.path.join(RESOURCES_DIR, 'list-discard.txt') args.blacklist = os.path.join(RESOURCES_DIR, 'list-discard.txt') f = io.StringIO() with redirect_stdout(f): cli.process_args(args) assert len(f.getvalue()) == 0 # version testargs = ['', '--version'] with pytest.raises(SystemExit) as e, redirect_stdout(f): with patch.object(sys, 'argv', testargs): args = cli.parse_args(testargs) assert e.type == SystemExit assert e.value.code == 0 assert re.match( r'Trafilatura [0-9]\.[0-9]\.[0-9] - Python [0-9]\.[0-9]+\.[0-9]', f.getvalue())