Ejemplo n.º 1
0
 def test_character_name(self):
     ' Test character naming '
     from calibre.utils.unicode_names import character_name_from_code
     for q, e in {
             '\U0001f431': 'CAT FACE'
             }.items():
         self.ae(icu.character_name(q), e)
         self.ae(character_name_from_code(icu.ord_string(q)[0]), e)
Ejemplo n.º 2
0
def add_chars(text, counter, file_name):
    if text:
        if isinstance(text, bytes):
            text = text.decode('utf-8', 'ignore')
        counts = Counter(ord_string(text))
        counter.update(counts)
        for codepoint in counts:
            counter.chars[codepoint].add(file_name)
Ejemplo n.º 3
0
 def test_character_name(self):
     ' Test character naming '
     from calibre.utils.unicode_names import character_name_from_code
     for q, e in {
             '\U0001f431': 'CAT FACE'
             }.items():
         self.ae(icu.character_name(q), e)
         self.ae(character_name_from_code(icu.ord_string(q)[0]), e)
Ejemplo n.º 4
0
    def get_element_font_usage(self, elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, font_usage_map, font_spec):
        text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat)
        if not text:
            return

        def update_usage_for_embed(font, chars):
            if not do_embed:
                return
            ff = [icu_lower(x) for x in font.get('font-family', ())]
            if ff and ff[0] not in bad_fonts:
                key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in font_keys))
                val = font_usage_map.get(key)
                if val is None:
                    val = font_usage_map[key] = {'text': set()}
                    for k in font_keys:
                        val[k] = font[k][0] if k == 'font-family' else font[k]
                val['text'] |= chars
            for ff in font.get('font-family', ()):
                if ff and icu_lower(ff) not in bad_fonts:
                    font_spec.add(ff)

        font = get_font_dict(elem, resolve_property)
        chars = frozenset(ord_string(text)) - exclude_chars
        update_usage_for_embed(font, chars)
        for rule in get_matching_rules(font_face_rules, font):
            self.font_stats[rule['src']] |= chars
        q = resolve_pseudo_property(elem, 'first-letter', 'font-family', abort_on_missing=True)
        if q is not None:
            font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
            text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-letter')
            m = self.first_letter_pat.search(text.lstrip())
            if m is not None:
                chars = frozenset(ord_string(m.group())) - exclude_chars
                update_usage_for_embed(font, chars)
                for rule in get_matching_rules(font_face_rules, font):
                    self.font_stats[rule['src']] |= chars
        q = resolve_pseudo_property(elem, 'first-line', 'font-family', abort_on_missing=True)
        if q is not None:
            font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
            text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-line')
            chars = frozenset(ord_string(text)) - exclude_chars
            update_usage_for_embed(font, chars)
            for rule in get_matching_rules(font_face_rules, font):
                self.font_stats[rule['src']] |= chars
Ejemplo n.º 5
0
    def get_element_font_usage(self, elem, resolve_property, resolve_pseudo_property, font_face_rules, do_embed, font_usage_map, font_spec):
        text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat)
        if not text:
            return

        def update_usage_for_embed(font, chars):
            if not do_embed:
                return
            ff = [icu_lower(x) for x in font.get('font-family', ())]
            if ff and ff[0] not in bad_fonts:
                key = frozenset(((k, ff[0] if k == 'font-family' else v) for k, v in font.iteritems() if k in font_keys))
                val = font_usage_map.get(key)
                if val is None:
                    val = font_usage_map[key] = {'text': set()}
                    for k in font_keys:
                        val[k] = font[k][0] if k == 'font-family' else font[k]
                val['text'] |= chars
            for ff in font.get('font-family', ()):
                if ff and icu_lower(ff) not in bad_fonts:
                    font_spec.add(ff)

        font = get_font_dict(elem, resolve_property)
        chars = frozenset(ord_string(text)) - exclude_chars
        update_usage_for_embed(font, chars)
        for rule in get_matching_rules(font_face_rules, font):
            self.font_stats[rule['src']] |= chars
        q = resolve_pseudo_property(elem, 'first-letter', 'font-family', abort_on_missing=True)
        if q is not None:
            font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
            text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-letter')
            m = self.first_letter_pat.search(text.lstrip())
            if m is not None:
                chars = frozenset(ord_string(m.group())) - exclude_chars
                update_usage_for_embed(font, chars)
                for rule in get_matching_rules(font_face_rules, font):
                    self.font_stats[rule['src']] |= chars
        q = resolve_pseudo_property(elem, 'first-line', 'font-family', abort_on_missing=True)
        if q is not None:
            font = get_font_dict(elem, resolve_pseudo_property, pseudo='first-letter')
            text = get_element_text(elem, resolve_property, resolve_pseudo_property, self.capitalize_pat, for_pseudo='first-line')
            chars = frozenset(ord_string(text)) - exclude_chars
            update_usage_for_embed(font, chars)
            for rule in get_matching_rules(font_face_rules, font):
                self.font_stats[rule['src']] |= chars
Ejemplo n.º 6
0
def html_entities():
    ans = getattr(html_entities, 'ans', None)
    if ans is None:
        from calibre.ebooks.html_entities import html5_entities
        ans = defaultdict(set)
        for name, char in iteritems(html5_entities):
            try:
                ans[name.lower()].add(ord_string(char)[0])
            except TypeError:
                continue
        ans['nnbsp'].add(0x202F)
        ans = dict(ans)
        html_entities.ans = ans
    return ans
Ejemplo n.º 7
0
 def update_position(self, line=None, col=None, character=None):
     if line is None:
         self.la.setText('')
     else:
         try:
             name = character_name_from_code(ord_string(character)[0]) if character and tprefs['editor_show_char_under_cursor'] else None
         except Exception:
             name = None
         text = _('Line: {0} : {1}').format(line, col)
         if not name:
             name = {'\t':'TAB'}.get(character, None)
         if name and tprefs['editor_show_char_under_cursor']:
             text = name + ' : ' + text
         self.la.setText(text)
Ejemplo n.º 8
0
 def update_position(self, line=None, col=None, character=None):
     if line is None:
         self.la.setText('')
     else:
         try:
             name = character_name_from_code(ord_string(character)[0]) if character and tprefs['editor_show_char_under_cursor'] else None
         except Exception:
             name = None
         text = _('Line: {0} : {1}').format(line, col)
         if not name:
             name = {'\t':'TAB'}.get(character, None)
         if name and tprefs['editor_show_char_under_cursor']:
             text = name + ' : ' + text
         self.la.setText(text)
Ejemplo n.º 9
0
def html_entities():
    ans = getattr(html_entities, 'ans', None)
    if ans is None:
        from calibre.ebooks.html_entities import html5_entities
        ans = defaultdict(set)
        for name, char in iteritems(html5_entities):
            try:
                ans[name.lower()].add(ord_string(char)[0])
            except TypeError:
                continue
        ans['nnbsp'].add(0x202F)
        ans = dict(ans)
        html_entities.ans = ans
    return ans
Ejemplo n.º 10
0
def chars_data(container, *args):
    chars = defaultdict(set)
    counter = Counter()
    def count(codepoint):
        counter[codepoint] += 1

    for name, is_linear in container.spine_names:
        if container.mime_map.get(name) not in OEB_DOCS:
            continue
        raw = container.raw_data(name)
        counts = Counter(ord_string(raw))
        counter.update(counts)
        for codepoint in counts:
            chars[codepoint].add(name)

    nmap = {n:i for i, (n, l) in enumerate(container.spine_names)}
    def sort_key(name):
        return nmap.get(name, len(nmap)), numeric_sort_key(name)

    for i, (codepoint, usage) in enumerate(chars.iteritems()):
        yield Char(i, safe_chr(codepoint), codepoint, sorted(usage, key=sort_key), counter[codepoint])
Ejemplo n.º 11
0
def chars_data(container, book_locale):
    chars = defaultdict(set)
    counter = Counter()
    def count(codepoint):
        counter[codepoint] += 1

    for name, is_linear in container.spine_names:
        if container.mime_map.get(name) not in OEB_DOCS:
            continue
        raw = container.raw_data(name)
        counts = Counter(ord_string(raw))
        counter.update(counts)
        for codepoint in counts:
            chars[codepoint].add(name)

    nmap = {n:i for i, (n, l) in enumerate(container.spine_names)}
    def sort_key(name):
        return nmap.get(name, len(nmap)), numeric_sort_key(name)

    for i, (codepoint, usage) in enumerate(chars.iteritems()):
        yield Char(i, safe_chr(codepoint), codepoint, sorted(usage, key=sort_key), counter[codepoint])
Ejemplo n.º 12
0
def map_symbol_text(text, font):
    m = SYMBOL_MAPS[font]
    if isinstance(text, bytes):
        text = text.decode('utf-8')
    return ''.join(do_map(m, ord_string(text)))
Ejemplo n.º 13
0
def map_symbol_text(text, font):
    m = SYMBOL_MAPS[font]
    return ''.join(do_map(m, ord_string(text)))
Ejemplo n.º 14
0
def map_symbol_text(text, font):
    m = SYMBOL_MAPS[font]
    return ''.join(do_map(m, ord_string(text)))
Ejemplo n.º 15
0
def safe_ord(x):
    return ord_string(unicode_type(x))[0]
Ejemplo n.º 16
0
        ff = resolve_property(elem, pseudo, 'font-family')
    ans['font-family'] = tuple(x.value for x in ff)
    for p in 'weight', 'style', 'stretch':
        p = 'font-' + p
        rp = resolve_property(elem, p) if pseudo is None else resolve_property(
            elem, pseudo, p)
        ans[p] = str(rp[0].value)
    normalize_font_properties(ans)
    return ans


bad_fonts = {
    'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif',
    'inherit'
}
exclude_chars = frozenset(ord_string('\n\r\t'))
skip_tags = {XHTML(x) for x in 'script style title meta link'.split()}
font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}


def prepare_font_rule(cssdict):
    cssdict['font-family'] = frozenset(cssdict['font-family'][:1])
    cssdict['width'] = widths[cssdict['font-stretch']]
    cssdict['weight'] = int(cssdict['font-weight'])


class StatsCollector:

    first_letter_pat = capitalize_pat = None

    def __init__(self, container, do_embed=False):
Ejemplo n.º 17
0
def map_symbol_text(text, font):
    m = SYMBOL_MAPS[font]
    if isinstance(text, bytes):
        text = text.decode('utf-8')
    return ''.join(do_map(m, ord_string(text)))
Ejemplo n.º 18
0
def get_font_dict(elem, resolve_property, pseudo=None):
    ans = {}
    if pseudo is None:
        ff = resolve_property(elem, 'font-family')
    else:
        ff = resolve_property(elem, pseudo, 'font-family')
    ans['font-family'] = tuple(x.value for x in ff)
    for p in 'weight', 'style', 'stretch':
        p = 'font-' + p
        rp = resolve_property(elem, p) if pseudo is None else resolve_property(elem, pseudo, p)
        ans[p] = type('')(rp[0].value)
    normalize_font_properties(ans)
    return ans

bad_fonts = {'serif', 'sans-serif', 'monospace', 'cursive', 'fantasy', 'sansserif', 'inherit'}
exclude_chars = frozenset(ord_string('\n\r\t'))
skip_tags = {XHTML(x) for x in 'script style title meta link'.split()}
font_keys = {'font-weight', 'font-style', 'font-stretch', 'font-family'}

def prepare_font_rule(cssdict):
    cssdict['font-family'] = frozenset(cssdict['font-family'][:1])
    cssdict['width'] = widths[cssdict['font-stretch']]
    cssdict['weight'] = int(cssdict['font-weight'])

class StatsCollector(object):

    first_letter_pat = capitalize_pat = None

    def __init__(self, container, do_embed=False):
        if self.first_letter_pat is None:
            StatsCollector.first_letter_pat = self.first_letter_pat = regex.compile(
Ejemplo n.º 19
0
def safe_ord(x):
    return ord_string(str(x))[0]
Ejemplo n.º 20
0
def safe_ord(x):
    return ord_string(unicode_type(x))[0]