def test_get_id_for_idx(self): xuwc = XUWc() region_id = "file1.1.4" self.assertEqual( "file1", xuwc.get_id_for_idx( 0, region_id) ) self.assertEqual( "file1.1", xuwc.get_id_for_idx(1, region_id)) self.assertEqual( "file1.1.4", xuwc.get_id_for_idx(2, region_id)) # not sure if this behavior is most useful self.assertEqual( None, xuwc.get_id_for_idx( 3, region_id))
def test_get_ids_for_idx(self): xuwc = XUWc() region_ids = [ "file1.1.4", "file1.2.3", "file2.1.1" ] new_ids = xuwc.get_ids_for_idx( 0, region_ids ) self.assertEqual( len(new_ids), 2 ) self.assertEqual( new_ids[0], "file1" ) self.assertEqual( new_ids[1], "file2" ) new_ids = xuwc.get_ids_for_idx( 1, region_ids ) self.assertEqual( len(new_ids), 3 ) self.assertEqual( new_ids[0], "file1.1" ) self.assertEqual( new_ids[1], "file1.2" ) self.assertEqual( new_ids[2], "file2.1" ) new_ids = xuwc.get_ids_for_idx( 2, region_ids ) self.assertEqual( len(new_ids), 3 ) for i in range(0, len(region_ids)): self.assertEqual( region_ids[i], new_ids[i] ) # not sure if this behavior is most useful new_ids = xuwc.get_ids_for_idx( 3, region_ids ) self.assertEqual( len(new_ids), 0 )
def test_xuwc(self): # M 1.1: First test out some queries where I specify no options xupath = self.tei_xupath1 file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.SECTION container_unit = BuiltinGrammar.FILE label_path_delimiter = ":" element_equality_fields = [ CorpusElement.LABEL_PATH,\ CorpusElement.LANGUAGE_NAME_PATH,\ CorpusElement.TEXT ] xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), BuiltinGrammar.FILE ) #container_corpus = xuwc.get_container_corpus() #self.assertEqual( len(container_corpus), 1) #container_idx = 0 #self.assertEqual( container_corpus[container_idx].get_file_path(), self.tei_data_path1 ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.SECTION ) counts = xuwc.get_counts() key = os.path.basename(self.tei_data_path1) self.assertEqual( counts[key], 2 ) # M 1.2 xupath = self.tei_xupath2 file_paths = [ self.tei_data_path1, self.tei_data_path2 ] count_unit = TEIXMLGrammar.SUBSUBSECTION container_unit = BuiltinGrammar.FILE label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), BuiltinGrammar.FILE ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.SUBSUBSECTION ) counts = xuwc.get_counts() # There is only one subsubsection with 'Globus' in it and its in v1 self.assertEqual( len(counts.keys()), 2 ) key = os.path.basename(self.tei_data_path1) self.assertEqual( counts[key], 1 ) # M 2.1 Override count xupath = self.ios_xupath1 file_paths = [ self.ios_data_path1 ] count_unit = BuiltinGrammar.BYTE container_unit = CiscoIOSGrammar.INTERFACE label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), CiscoIOSGrammar.INTERFACE ) self.assertEqual( xuwc.get_count_unit(), BuiltinGrammar.BYTE ) counts = xuwc.get_counts() self.assertEqual( len(counts.keys()), 2 ) key1 = label_path_delimiter.join(["router.v1.example", "Loopback0"]) key2 = label_path_delimiter.join(["router.v1.example", "GigabitEthernet4/2"]) self.assertEqual( counts[key1], 186 ) self.assertEqual( counts[key2], 232 ) # M 2.3 xupath = self.ios_xupath1 file_paths = [ self.ios_data_path1 ] count_unit = BuiltinGrammar.WORD container_unit = CiscoIOSGrammar.INTERFACE label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), CiscoIOSGrammar.INTERFACE ) self.assertEqual( xuwc.get_count_unit(), BuiltinGrammar.WORD ) counts = xuwc.get_counts() key1 = label_path_delimiter.join(["router.v1.example", "Loopback0"]) key2 = label_path_delimiter.join(["router.v1.example", "GigabitEthernet4/2"]) self.assertEqual( counts[key1], 23 ) self.assertEqual( counts[key2], 27 ) # M 2.4 xupath = self.ios_xupath1 file_paths = [ self.ios_data_path1 ] count_unit = BuiltinGrammar.CHARACTER container_unit = CiscoIOSGrammar.INTERFACE label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), CiscoIOSGrammar.INTERFACE ) self.assertEqual( xuwc.get_count_unit(), BuiltinGrammar.CHARACTER ) counts = xuwc.get_counts() key1 = label_path_delimiter.join(["router.v1.example", "Loopback0"]) key2 = label_path_delimiter.join(["router.v1.example", "GigabitEthernet4/2"]) self.assertEqual( counts[key1], 186 ) self.assertEqual( counts[key2], 232 ) """ # M 2.6 # This is a case that we don't yet implement AND # to query number of subsections that satisfy, we need a predicate xupath = "/".join( [ TEIXMLGrammar.SECTION, TEIXMLGrammar.SUBSECTION, TEIXMLGrammar.SUBSUBSECTION ] ) file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.SUBSECTION container_unit = BuiltinGrammar.FILE label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), BuiltinGrammar.FILE ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.SUBSECTION ) counts = xuwc.get_counts() label_path_keys = counts.keys() self.assertEqual( len(label_path_keys), 1 ) key = label_path_keys[0] self.assertEqual( counts[key], 1 ) """ # M 4.1 xupath = "/".join( [ TEIXMLGrammar.SECTION, TEIXMLGrammar.PARAGRAPH ] ) file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.PARAGRAPH container_unit = TEIXMLGrammar.SECTION label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), TEIXMLGrammar.SECTION ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.PARAGRAPH ) counts = xuwc.get_counts() ## There are two sections self.assertEqual( len(counts.keys()), 2 ) key1 = label_path_delimiter.join(["section.tei.v1.xml", "1 INTRODUCTION"]) key2 = label_path_delimiter.join(["section.tei.v1.xml", "9. Glossary"]) # should be 4 and not 5 since the paragraphs in subsubsection2 are the same # under our base equality function self.assertEqual( counts[key1], 4) self.assertEqual( counts[key2], 1) """ # M 4.4 I'm not sure what this test even means xupath = "/".join([ TEIXMLGrammar.SECTION, TEIXMLGrammar.PARAGRAPH ] ) file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.PARAGRAPH container_unit = TEIXMLGrammar.PARAGRAPH label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), TEIXMLGrammar.PARAGRAPH ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.PARAGRAPH ) counts = xuwc.get_counts() print counts.keys() self.assertEqual( len(counts.keys()), 6 ) """ # M 4.5 xupath = "/".join([ TEIXMLGrammar.SECTION, TEIXMLGrammar.SUBSECTION, TEIXMLGrammar.PARAGRAPH] ) file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.PARAGRAPH container_unit = TEIXMLGrammar.SUBSECTION label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), TEIXMLGrammar.SUBSECTION ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.PARAGRAPH ) counts = xuwc.get_counts() self.assertEqual( len(counts.keys()), 1) key = counts.keys()[0] self.assertEqual( counts[key], 3 ) # M 4.6 xupath = "/".join([ TEIXMLGrammar.SECTION, TEIXMLGrammar.SUBSECTION, TEIXMLGrammar.PARAGRAPH] ) file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.PARAGRAPH container_unit = TEIXMLGrammar.SECTION label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), TEIXMLGrammar.SECTION ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.PARAGRAPH ) counts = xuwc.get_counts() # There is only 1 section that contains a subsection self.assertEqual( len(counts.keys()), 1) key = counts.keys()[0] # There are 3 paragraphs contained within the subsection self.assertEqual( counts[key], 3 ) # M 4.7 xupath = "/".join([ TEIXMLGrammar.SECTION, TEIXMLGrammar.PARAGRAPH ] ) file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.PARAGRAPH container_unit = TEIXMLGrammar.SECTION label_path_delimiter = ":" xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter) self.assertEqual( xuwc.get_container_unit(), TEIXMLGrammar.SECTION ) self.assertEqual( xuwc.get_count_unit(), TEIXMLGrammar.PARAGRAPH ) counts = xuwc.get_counts() self.assertEqual( len(counts.keys()), 2) key1 = label_path_delimiter.join( ["section.tei.v1.xml","1 INTRODUCTION"] ) key2 = label_path_delimiter.join( ["section.tei.v1.xml","9. Glossary"] ) self.assertEqual( counts[key1], 4) self.assertEqual( counts[key2], 1) # M 4.8 xupath = "/".join([ TEIXMLGrammar.SECTION, TEIXMLGrammar.PARAGRAPH, BuiltinGrammar.LINE ] ) file_paths = [ self.tei_data_path1 ] count_unit = TEIXMLGrammar.PARAGRAPH container_unit = BuiltinGrammar.LINE label_path_delimiter = ":" with self.assertRaises(IndexError): xuwc = XUWc.create(xupath, file_paths, element_equality_fields, count_unit, container_unit, label_path_delimiter)
p = optparse.OptionParser() p.add_option("-a", "--count", dest="count_type") p.add_option("-b", "--re_count", dest="count_regexp") p.add_option("-c", "--context", dest="context_type") p.add_option("-d", "--re_context", dest="context_regexp") (options, args) = p.parse_args() if ( len(args) < 2 ): print "Usage xuwc [ --count <count_type> | --re_count <count_regexp> ] [ --context <context_type> | --re_context <context_regexp> ] <xpath> <files>+" sys.exit(0) xpath = args[0] file_paths = args[1:] wc_params = {} wc_params['count_type'] = options.count_type wc_params['count_regexp'] = options.count_regexp wc_params['context_type'] = options.context_type wc_params['context_regexp'] = options.context_regexp if wc_params['count_type'] != None and wc_params['count_regexp'] != None: print "Cannot specify both a count and re_count option." sys.exit(-1) if wc_params['context_type'] != None and wc_params['context_regexp'] != None: print "Cannot specify both a context and re_context option." sys.exit(-1) wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) XUWc.output_reports( wc_reports, wc_params )
def test_xuwc(self): # M 1.1: First test out the some queries when I specify no options xpath = "//tei:section" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 2 ) self.assertEqual( wc_params['count_type'], "tei:section" ) # M 1.2 xpath = "//tei:section/tei:subsection/tei:subsubsection[re:testsubtree('Globus','gi')]" file_paths = [ self.tei_data_path1, self.tei_data_path2 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 2 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) self.assertEqual( wc_reports.keys()[1], self.tei_data_path2 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:subsubsection") wc_report = wc_reports[ self.tei_data_path2 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:subsubsection") # M 2.1 Override count xpath = "//ios:interface" file_paths = [ self.ios_data_path1 ] wc_params = {} wc_params['count_type'] = "builtin:byte" wc_params['count_regexp'] = None wc_params['context_type'] = "ios:interface" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 ) wc_report = wc_reports[ self.ios_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "ios:interface" ) key_prefix = self.ios_data_path1.replace('.','_') context_region1_id = key_prefix + ".Loopback0" context_region2_id = key_prefix + ".GigabitEthernet4/2" self.assertEqual( wc_report[context_region1_id], 186 ) self.assertEqual( wc_report[context_region2_id], 232 ) self.assertEqual( wc_params['count_type'], "builtin:byte" ) # M 2.3 file_paths = [ self.ios_data_path1 ] wc_params = {} wc_params['count_type'] = "builtin:word" wc_params['count_regexp'] = None wc_params['context_type'] = "ios:interface" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 ) wc_report = wc_reports[ self.ios_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "ios:interface" ) key_prefix = self.ios_data_path1.replace('.','_') context_region1_id = key_prefix + ".Loopback0" context_region2_id = key_prefix + ".GigabitEthernet4/2" self.assertEqual( wc_report[context_region1_id], 23 ) self.assertEqual( wc_report[context_region2_id], 27 ) self.assertEqual( wc_params['count_type'], "builtin:word" ) # M 2.4 file_paths = [ self.ios_data_path1 ] wc_params = {} wc_params['count_type'] = "builtin:character" wc_params['count_regexp'] = None wc_params['context_type'] = "ios:interface" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.ios_data_path1 ) wc_report = wc_reports[ self.ios_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "ios:interface" ) key_prefix = self.ios_data_path1.replace('.','_') context_region1_id = key_prefix + ".Loopback0" context_region2_id = key_prefix + ".GigabitEthernet4/2" self.assertEqual( wc_report[context_region1_id], 186 ) self.assertEqual( wc_report[context_region2_id], 232 ) self.assertEqual( wc_params['count_type'], "builtin:character" ) # M 2.5 # This one should generate an error xpath = "//tei:section" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = "tei:subsection" wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params ) # M 2.6 xpath = "//tei:section/tei:subsection/tei:subsubsection" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = "tei:subsection" wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "file_path" ) context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:subsection" ) # M 3.1 xpath = "//tei:section" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = "\w+" wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "tei:section" ) key_prefix = self.tei_data_path1.replace('.','_') context_region1_id = key_prefix + ".1" context_region2_id = key_prefix + ".9" self.assertEqual( wc_report[context_region1_id], 357 ) self.assertEqual( wc_report[context_region2_id], 45 ) self.assertEqual( wc_params['count_regexp'], "\w+" ) # M 4.1 context override xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "tei:section" ) section1_context_region_id = wc_report.keys()[0] section2_context_region_id = wc_report.keys()[1] self.assertEqual( wc_report[ section1_context_region_id ], 5 ) self.assertEqual( wc_report[ section2_context_region_id ], 1 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.2 should generate an error xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:subsection" wc_params['context_regexp'] = None #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params ) # M 4.3 should generate an error xpath = "//tei:section/tei:subsection" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:subsubsection" wc_params['context_regexp'] = None #wc_reports = HWc.hwc_main( xpath, file_paths, wc_params ) # M 4.4 xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:p" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len(wc_report.keys()) self.assertEqual( num_context_regions, 6 ) self.assertEqual( wc_params['context_type'], "tei:p" ) for context_region_id in wc_report.keys(): self.assertEqual( wc_report[context_region_id], 1 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.5 xpath = "//tei:section/tei:subsection/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:subsection" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "tei:subsection") section1_context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[ section1_context_region_id ], 4 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.6 xpath = "//tei:section/tei:subsection/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len(wc_reports.keys()), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 1 ) self.assertEqual( wc_params['context_type'], "tei:section") section1_context_region_id = wc_report.keys()[0] self.assertEqual( wc_report[ section1_context_region_id ], 4 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.7 xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = "tei:section" wc_params['context_regexp'] = None wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params ) self.assertEqual( len( wc_reports.keys() ), 1 ) self.assertEqual( wc_reports.keys()[0], self.tei_data_path1 ) wc_report = wc_reports[ self.tei_data_path1 ] num_context_regions = len( wc_report.keys() ) self.assertEqual( num_context_regions, 2 ) self.assertEqual( wc_params['context_type'], "tei:section") section1_context_region_id = wc_report.keys()[0] section9_context_region_id = wc_report.keys()[1] self.assertEqual( wc_report[ section1_context_region_id ], 5 ) self.assertEqual( wc_report[ section9_context_region_id ], 1 ) self.assertEqual( wc_params['count_type'], "tei:p" ) # M 4.8 - should explode xpath = "//tei:section/tei:p" file_paths = [ self.tei_data_path1 ] wc_params = {} wc_params['count_type'] = None wc_params['count_regexp'] = None wc_params['context_type'] = None wc_params['context_regexp'] = "builtin:line" wc_reports = XUWc.xuwc_main( xpath, file_paths, wc_params )