def test_camel_case(): eq_(un_camel_case("1984ZXSpectrumGames"), "1984 ZX Spectrum Games") eq_(un_camel_case("aaAa aaAaA 0aA AAAa!AAA"), "aa Aa aa Aa A 0a A AA Aa! AAA") eq_(un_camel_case("MotörHead"), "Mot\xf6r Head") eq_(un_camel_case("MSWindows3.11ForWorkgroups"), "MS Windows 3.11 For Workgroups") # This should not significantly affect text that is not camel-cased eq_(un_camel_case("ACM_Computing_Classification_System"), "ACM Computing Classification System") eq_(un_camel_case("Anne_Blunt,_15th_Baroness_Wentworth"), "Anne Blunt, 15th Baroness Wentworth") eq_(un_camel_case("Hindi-Urdu"), "Hindi-Urdu")
def test_camel_case(): eq_(un_camel_case('1984ZXSpectrumGames'), '1984 ZX Spectrum Games') eq_(un_camel_case('aaAa aaAaA 0aA AAAa!AAA'), 'aa Aa aa Aa A 0a A AA Aa! AAA') eq_(un_camel_case('MotörHead'), 'Mot\xf6r Head') eq_(un_camel_case('MSWindows3.11ForWorkgroups'), 'MS Windows 3.11 For Workgroups') # This should not significantly affect text that is not camel-cased eq_(un_camel_case('ACM_Computing_Classification_System'), 'ACM Computing Classification System') eq_(un_camel_case('Anne_Blunt,_15th_Baroness_Wentworth'), 'Anne Blunt, 15th Baroness Wentworth') eq_(un_camel_case('Hindi-Urdu'), 'Hindi-Urdu')
def translate_dbpedia_url(url, lang='en'): """ Convert an object that's defined by a DBPedia URL to a ConceptNet URI. We do this by finding the part of the URL that names the object, and using that as surface text for ConceptNet. This is, in some ways, abusing a naming convention in the Semantic Web. The URL of an object doesn't have to mean anything at all. The human-readable name is supposed to be a string, specified by the "name" relation. The problem here is that the "name" relation is not unique in either direction. A URL can have many names, and the same name can refer to many URLs, and some of these names are the result of parsing glitches. The URL itself is a stable thing that we can build a ConceptNet URI from, on the other hand. """ # Some Semantic Web URLs are camel-cased. ConceptNet URIs use underscores # between words. pieces = parse_topic_name(resource_name(url)) pieces[0] = un_camel_case(pieces[0]) return normalized_concept_uri(lang, *pieces)
def translate_wp_url(url): url = urllib.unquote(url).decode('utf-8', 'ignore') return un_camel_case(url.strip('/').split('/')[-1].split('#')[-1])