def test_name2unicode_uni_empty_string(): """The name "uni20ac" has a single component, which is mapped to a euro-sign. According to the specification this should be mapped to an empty string, but we also want to support lowercase hexadecimals""" assert '\u20ac' == name2unicode('uni20ac')
def __join_line(self, textline_list): '''return text, and clean text''' text = '' for i in textline_list: ''' pdf font change (cid:404) & < (cid:148) (cid:13) (cid:176) (cid:131) (cid:147) (cid:150) " (cid:146) > (cid:151) ''' step0 = [j for j in i if j] # decode cid step1 = [ encodingdb.name2unicode(j) if re.search('cid', j) else j for j in step0 ] # decode & step2 = [] for j in step1: if j == '&': step2.append('&') elif j == '<': step2.append('<') elif j == '>': step2.append('>') elif j == '"': step2.append('"') elif j == ''': step2.append("'") else: step2.append(j) # - try: if step2[-1] == '\n': if step2[-2] is '-': step3 = step2[0:-2:] else: step3 = step2[::] else: if step2[-1] is '-': step3 = step2[0:-1:] else: step3 = step2 + ['\n'] except: step3 = step2 # \n step4 = [j if j is not '\n' else ' ' for j in step3] text += r''.join(step4) return text
def test_name2unicode_u_with_5_digits(): """The name "u1040C" has a single component, which is mapped to the string U+1040C""" assert '\U0001040C' == name2unicode('u1040C')
def test_name2unicode_u_with_4_digits_lowercase(): """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B""" assert '\u013B' == name2unicode('u013b')
def test_name2unicode_uni_pua_lowercase(): """"Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB.""" assert '\uF6FB' == name2unicode('unif6fb')
def test_name2unicode_uni_with_sequence_of_digits_lowercase(): """The name "uni20AC0308" has a single component, which is mapped to the string U+20AC U+0308""" assert '\u20AC\u0308' == name2unicode('uni20ac0308')
def test_name2unicode_uni(): """The components "Lcommaaccent," "uni013B," and "u013B" all map to the string U+013B""" assert '\u013B' == name2unicode('uni013B')
def test_name2unicode_pua_ogoneksmall(): """" Ogoneksmall" and "uniF6FB" both map to the string that corresponds to U+F6FB.""" assert '\uF6FB' == name2unicode('Ogoneksmall')
def test_name2unicode_multiple_components_lowercase(): """The name "Lcommaaccent_uni20AC0308_u1040C.alternate" is mapped to the string U+013B U+20AC U+0308 U+1040C""" assert '\u013B\u20AC\u0308\U0001040C' == \ name2unicode('Lcommaaccent_uni20ac0308_u1040c.alternate')
def test_name2unicode_name_in_agl(): """The name "Lcommaaccent" has a single component, which is mapped to the string U+013B by AGL""" assert '\u013B' == name2unicode('Lcommaaccent')