p = optparse.OptionParser() p.add_option("-a", "--count", dest="count_type") p.add_option("-b", "--re_count", dest="count_regexp") p.add_option("-c", "--context", dest="context_type") p.add_option("-d", "--re_context", dest="context_regexp") (options, args) = p.parse_args() if ( len(args) < 2 ): print "Usage xuwc [ --count <count_type> | --re_count <count_regexp> ] [ --context <context_type> | --re_context <context_regexp> ] <xpath> <files>+" sys.exit(0) xpath = args[0] file_paths = args[1:] wc_params = {} wc_params['count_type'] = options.count_type wc_params['count_regexp'] = options.count_regexp wc_params['context_type'] = options.context_type wc_params['context_regexp'] = options.context_regexp if wc_params['count_type'] != None and wc_params['count_regexp'] != None: print "Cannot specify both a count and re_count option." sys.exit(-1) if wc_params['context_type'] != None and wc_params['context_regexp'] != None: print "Cannot specify both a context and re_context option." sys.exit(-1) wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) XUWc.output_reports( wc_reports, wc_params )
def test_xuwc(self): # M 1.1: First test out the some queries when I specify no options xpath = "//tei:section" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 2 ) self.assertEqual( wc_params['count_type'], "tei:section" ) # M 1.2 xpath = "//tei:section/tei:subsection/tei:subsubsection[re:testsubtree('Globus','gi')]" file_paths = [ self.tei_data_path1, self.tei_data_path2 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 2 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) self.assertEqual( wc_reports.keys()[1], self.tei_data_path2 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:subsubsection") wc_report = wc_reports[ self.tei_data_path2 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:subsubsection") # M 2.1 Override count xpath = "//ios:interface" file_paths = [ self.ios_data_path1 ] wc_params = {} wc_params['count_type'] = "builtin:byte" wc_params['count_regexp'] = None wc_params['context_type'] = "ios:interface" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 ) wc_report = wc_reports[ self.ios_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "ios:interface" ) key_prefix = self.ios_data_path1.replace('.','_') context_region1_id = key_prefix + ".Loopback0" context_region2_id = key_prefix + ".GigabitEthernet4/2" self.assertEqual( wc_report[context_region1_id], 186 ) self.assertEqual( wc_report[context_region2_id], 232 ) self.assertEqual( wc_params['count_type'], "builtin:byte" ) # M 2.3 file_paths = [ self.ios_data_path1 ] wc_params = {} wc_params['count_type'] = "builtin:word" wc_params['count_regexp'] = None wc_params['context_type'] = "ios:interface" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 ) wc_report = wc_reports[ self.ios_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "ios:interface" ) key_prefix = self.ios_data_path1.replace('.','_') context_region1_id = key_prefix + ".Loopback0" context_region2_id = key_prefix + ".GigabitEthernet4/2" self.assertEqual( wc_report[context_region1_id], 23 ) self.assertEqual( wc_report[context_region2_id], 27 ) self.assertEqual( wc_params['count_type'], "builtin:word" ) # M 2.4 file_paths = [ self.ios_data_path1 ] wc_params = {} wc_params['count_type'] = "builtin:character" wc_params['count_regexp'] = None wc_params['context_type'] = "ios:interface" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 ) wc_report = wc_reports[ self.ios_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "ios:interface" ) key_prefix = self.ios_data_path1.replace('.','_') context_region1_id = key_prefix + ".Loopback0" context_region2_id = key_prefix + ".GigabitEthernet4/2" self.assertEqual( wc_report[context_region1_id], 186 ) self.assertEqual( wc_report[context_region2_id], 232 ) self.assertEqual( wc_params['count_type'], "builtin:character" ) # M 2.5 # This one should generate an error xpath = "//tei:section" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = "tei:subsection" wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params ) # M 2.6 xpath = "//tei:section/tei:subsection/tei:subsubsection" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = "tei:subsection" wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:subsection" ) # M 3.1 xpath = "//tei:section" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = "\w+" wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "tei:section" ) key_prefix = self.tei_data_path1.replace('.','_') context_region1_id = key_prefix + ".1" context_region2_id = key_prefix + ".9" self.assertEqual( wc_report[context_region1_id], 357 ) self.assertEqual( wc_report[context_region2_id], 45 ) self.assertEqual( wc_params['count_regexp'], "\w+" ) # M 4.1 context override xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "tei:section" ) section1_context_region_id = wc_report.keys()[0] section2_context_region_id = wc_report.keys()[1] self.assertEqual( wc_report[ section1_context_region_id ], 5 ) self.assertEqual( wc_report[ section2_context_region_id ], 1 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.2 should generate an error xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:subsection" wc_params['context_regexp'] = None #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params ) # M 4.3 should generate an error xpath = "//tei:section/tei:subsection" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:subsubsection" wc_params['context_regexp'] = None #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params ) # M 4.4 xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:p" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 6 ) self.assertEqual( wc_params['context_type'], "tei:p" ) for context_region_id in wc_report.keys(): self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.5 xpath = "//tei:section/tei:subsection/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:subsection" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "tei:subsection") section1_context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[ section1_context_region_id ], 4 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.6 xpath = "//tei:section/tei:subsection/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "tei:section") section1_context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[ section1_context_region_id ], 4 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.7 xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len( wc_reports.keys() ), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "tei:section") section1_context_region_id = wc_report.keys()[0] section9_context_region_id = wc_report.keys()[1] self.assertEqual( wc_report[ section1_context_region_id ], 5 ) self.assertEqual( wc_report[ section9_context_region_id ], 1 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.8 - should explode xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = "builtin:line" wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )