Beispiel #1
0
    def __init__(self, raw, offset, prepend=()):
        list.__init__(self)
        self.extend(prepend)

        count = unpack_from(b'>H', raw, offset)[0]
        offset += 2
        self.pos = offset

        if count > 0:
            self.offset_size = unpack_from(b'>B', raw, offset)[0]
            offset += 1
            if self.offset_size == 3:
                offsets = [unpack(b'>L', b'\0' + raw[i:i+3])[0]
                            for i in range(offset, offset+3*(count+1), 3)]
            else:
                fmt = {1:'B', 2:'H', 4:'L'}[self.offset_size]
                fmt = ('>%d%s'%(count+1, fmt)).encode('ascii')
                offsets = unpack_from(fmt, raw, offset)
            offset += self.offset_size * (count+1) - 1

            for i in range(len(offsets)-1):
                off, noff = offsets[i:i+2]
                obj = raw[offset+off:offset+noff]
                self.append(obj)

            try:
                self.pos = offset + offsets[-1]
            except IndexError:
                self.pos = offset
Beispiel #2
0
    def __init__(self, *args, **kwargs):
        super(CmapTable, self).__init__(*args, **kwargs)

        self.version, self.num_tables = unpack_from(b'>HH', self.raw)

        self.tables = {}

        offset = 4
        sz = calcsize(b'>HHL')
        recs = []
        for i in range(self.num_tables):
            platform, encoding, table_offset = unpack_from(b'>HHL', self.raw,
                    offset)
            offset += sz
            recs.append((platform, encoding, table_offset))

        self.bmp_table = None

        for i in range(len(recs)):
            platform, encoding, offset = recs[i]
            try:
                next_offset = recs[i+1][-1]
            except IndexError:
                next_offset = len(self.raw)
            table = self.raw[offset:next_offset]
            if table:
                fmt = unpack_from(b'>H', table)[0]
                if platform == 3 and encoding == 1 and fmt == 4:
                    self.bmp_table = BMPTable(table)
Beispiel #3
0
 def get_atoms(self, entry):
     name = '/'.join(('/data', entry.internal, 'atom'))
     if name not in self.entries:
         return ({}, {})
     data = self.get_file(name)
     nentries, data = u32(data), data[4:]
     tags = {}
     for i in range(1, nentries + 1):
         if len(data) <= 1:
             break
         size, data = ord(data[0]), data[1:]
         if size == 0 or len(data) < size:
             break
         tags[i], data = data[:size], data[size:]
     if len(tags) != nentries:
         self._warn("damaged or invalid atoms tag table")
     if len(data) < 4:
         return (tags, {})
     attrs = {}
     nentries, data = u32(data), data[4:]
     for i in range(1, nentries + 1):
         if len(data) <= 4:
             break
         size, data = u32(data), data[4:]
         if size == 0 or len(data) < size:
             break
         attrs[i], data = data[:size], data[size:]
     if len(attrs) != nentries:
         self._warn("damaged or invalid atoms attributes table")
     return (tags, attrs)
Beispiel #4
0
def test_for_mem_leak():
    from calibre.utils.mem import memory, gc_histogram, diff_hists
    import gc
    gc.disable()
    scanner = DeviceScanner()
    scanner.scan()
    memory()  # load the psutil library
    for i in range(3):
        gc.collect()

    for reps in (1, 10, 100, 1000):
        for i in range(3):
            gc.collect()
        h1 = gc_histogram()
        startmem = memory()
        for i in range(reps):
            scanner.scan()
        for i in range(3):
            gc.collect()
        usedmem = memory(startmem)
        prints('Memory used in %d repetitions of scan(): %.5f KB'%(reps,
            1024*usedmem))
        prints('Differences in python object counts:')
        diff_hists(h1, gc_histogram())
        prints()
Beispiel #5
0
    def absorb_region(self, region, at):
        if len(region.columns) <= len(self.columns):
            for i in range(len(region.columns)):
                src, dest = region.columns[i], self.columns[i]
                if at != 'bottom':
                    src = reversed(list(iter(src)))
                for elem in src:
                    func = dest.add if at == 'bottom' else dest.prepend
                    func(elem)

        else:
            col_map = {}
            for i, col in enumerate(region.columns):
                max_overlap, max_overlap_index = 0, 0
                for j, dcol in enumerate(self.columns):
                    sint = Interval(col.left, col.right)
                    dint = Interval(dcol.left, dcol.right)
                    width = sint.intersection(dint).width
                    if width > max_overlap:
                        max_overlap = width
                        max_overlap_index = j
                col_map[i] = max_overlap_index
            lines = max(map(len, region.columns))
            if at == 'bottom':
                lines = range(lines)
            else:
                lines = range(lines-1, -1, -1)
            for i in lines:
                for j, src in enumerate(region.columns):
                    dest = self.columns[col_map[j]]
                    if i < len(src):
                        func = dest.add if at == 'bottom' else dest.prepend
                        func(src.elements[i])
Beispiel #6
0
        def test_mem_leaks(self):
            import gc
            from calibre.utils.mem import get_memory as memory
            m = Matcher(['a'], scorer=CScorer)
            m('a')

            def doit(c):
                m = Matcher([
                    c + 'im/one.gif',
                    c + 'im/two.gif',
                    c + 'text/one.html',
                ],
                            scorer=CScorer)
                m('one')

            start = memory()
            for i in range(10):
                doit(unicode_type(i))
            gc.collect()
            used10 = memory() - start
            start = memory()
            for i in range(100):
                doit(unicode_type(i))
            gc.collect()
            used100 = memory() - start
            if used100 > 0 and used10 > 0:
                self.assertLessEqual(used100, 2 * used10)
Beispiel #7
0
def _get_next_series_num_for_list(series_indices, unwrap=True):
    from calibre.utils.config_base import tweaks
    from math import ceil, floor
    if not series_indices:
        if isinstance(tweaks['series_index_auto_increment'], numbers.Number):
            return float(tweaks['series_index_auto_increment'])
        return 1.0
    if unwrap:
        series_indices = [x[0] for x in series_indices]
    if tweaks['series_index_auto_increment'] == 'next':
        return floor(series_indices[-1]) + 1
    if tweaks['series_index_auto_increment'] == 'first_free':
        for i in range(1, 10000):
            if i not in series_indices:
                return i
        # really shouldn't get here.
    if tweaks['series_index_auto_increment'] == 'next_free':
        for i in range(int(ceil(series_indices[0])), 10000):
            if i not in series_indices:
                return i
        # really shouldn't get here.
    if tweaks['series_index_auto_increment'] == 'last_free':
        for i in range(int(ceil(series_indices[-1])), 0, -1):
            if i not in series_indices:
                return i
        return series_indices[-1] + 1
    if isinstance(tweaks['series_index_auto_increment'], numbers.Number):
        return float(tweaks['series_index_auto_increment'])
    return 1.0
Beispiel #8
0
def insert_flows_into_markup(parts, flows, mobi8_reader, log):
    mr = mobi8_reader

    # kindle:flow:XXXX?mime=YYYY/ZZZ (used for style sheets, svg images, etc)
    tag_pattern = re.compile(r'''(<[^>]*>)''')
    flow_pattern = re.compile(r'''['"]kindle:flow:([0-9|A-V]+)\?mime=([^'"]+)['"]''', re.IGNORECASE)
    for i in range(len(parts)):
        part = parts[i]

        # flow pattern
        srcpieces = tag_pattern.split(part)
        for j in range(1, len(srcpieces),2):
            tag = srcpieces[j]
            if tag.startswith('<'):
                for m in flow_pattern.finditer(tag):
                    num = int(m.group(1), 32)
                    try:
                        fi = mr.flowinfo[num]
                    except IndexError:
                        log.warn('Ignoring invalid flow reference: %s'%m.group())
                        tag = ''
                    else:
                        if fi.format == 'inline':
                            tag = flows[num]
                        else:
                            replacement = '"../' + fi.dir + '/' + fi.fname + '"'
                            tag = flow_pattern.sub(replacement, tag, 1)
                srcpieces[j] = tag
        part = "".join(srcpieces)
        # store away modified version
        parts[i] = part
Beispiel #9
0
 def first_visible_row(self):
     geom = self.viewport().geometry()
     for y in range(geom.top(), (self.spacing()*2) + geom.top(), 5):
         for x in range(geom.left(), (self.spacing()*2) + geom.left(), 5):
             ans = self.indexAt(QPoint(x, y)).row()
             if ans > -1:
                 return ans
Beispiel #10
0
    def __init__(self, mf):
        for x in ('raw', 'palmdb', 'record_headers', 'records', 'mobi_header',
                'huffman_record_nums',):
            setattr(self, x, getattr(mf, x))

        self.index_header = self.index_record = None
        self.indexing_record_nums = set()
        pir = getattr(self.mobi_header, 'primary_index_record', NULL_INDEX)
        if pir != NULL_INDEX:
            self.index_header = IndexHeader(self.records[pir])
            numi = self.index_header.index_count
            self.cncx = CNCX(self.records[
                pir+1+numi:pir+1+numi+self.index_header.num_of_cncx_blocks],
                self.index_header.index_encoding)
            self.index_record = IndexRecord(self.records[pir+1:pir+1+numi],
                    self.index_header, self.cncx)
            self.indexing_record_nums = set(range(pir,
                pir+1+numi+self.index_header.num_of_cncx_blocks))
        self.secondary_index_record = self.secondary_index_header = None
        sir = self.mobi_header.secondary_index_record
        if sir != NULL_INDEX:
            self.secondary_index_header = SecondaryIndexHeader(self.records[sir])
            numi = self.secondary_index_header.index_count
            self.indexing_record_nums.add(sir)
            self.secondary_index_record = IndexRecord(
                    self.records[sir+1:sir+1+numi], self.secondary_index_header, self.cncx)
            self.indexing_record_nums |= set(range(sir+1, sir+1+numi))

        ntr = self.mobi_header.number_of_text_records
        fii = self.mobi_header.first_image_index
        self.text_records = [TextRecord(r, self.records[r],
            self.mobi_header.extra_data_flags, mf.decompress6) for r in range(1,
            min(len(self.records), ntr+1))]
        self.image_records, self.binary_records = [], []
        self.font_records = []
        image_index = 0
        for i in range(self.mobi_header.first_resource_record, min(self.mobi_header.last_resource_record, len(self.records))):
            if i in self.indexing_record_nums or i in self.huffman_record_nums:
                continue
            image_index += 1
            r = self.records[i]
            fmt = None
            if i >= fii and r.raw[:4] not in {b'FLIS', b'FCIS', b'SRCS',
                    b'\xe9\x8e\r\n', b'RESC', b'BOUN', b'FDST', b'DATP',
                    b'AUDI', b'VIDE', b'FONT', b'CRES', b'CONT', b'CMET'}:
                try:
                    fmt = what(None, r.raw)
                except:
                    pass
            if fmt is not None:
                self.image_records.append(ImageRecord(image_index, r, fmt))
            elif r.raw[:4] == b'FONT':
                self.font_records.append(FontRecord(i, r))
            else:
                self.binary_records.append(BinaryRecord(i, r))

        if self.index_record is not None:
            self.tbs_indexing = TBSIndexing(self.text_records,
                    self.index_record.indices, self.mobi_header.type_raw)
Beispiel #11
0
 def last_visible_row(self):
     geom = self.viewport().geometry()
     for y in range(geom.bottom(), geom.bottom() - 2 * self.spacing(), -5):
         for x in range(geom.left(), (self.spacing()*2) + geom.left(), 5):
             ans = self.indexAt(QPoint(x, y)).row()
             if ans > -1:
                 item_width = self.delegate.item_size.width() + 2*self.spacing()
                 return ans + (geom.width() // item_width)
Beispiel #12
0
 def get_definitions(self):
     ans = []
     for row in range(0, self.search_replace.rowCount()):
         colItems = []
         for col in range(0, self.search_replace.columnCount()):
             colItems.append(unicode_type(self.search_replace.item(row, col).text()))
         ans.append(colItems)
     return ans
Beispiel #13
0
def get_metadata(stream, extract_cover=True):
    '''
    Return metadata as a L{MetaInfo} object
    '''
    mi = MetaInformation(_('Unknown'), [_('Unknown')])
    stream.seek(0)

    pheader = PdbHeaderReader(stream)
    section_data = None
    for i in range(1, pheader.num_sections):
        raw_data = pheader.section_data(i)
        section_header = SectionHeader(raw_data)
        if section_header.type == DATATYPE_METADATA:
            section_data = raw_data[8:]
            break

    if not section_data:
        return mi

    default_encoding = 'latin-1'
    record_count, = struct.unpack('>H', section_data[0:2])
    adv = 0
    title = None
    author = None
    pubdate = 0
    for i in range(record_count):
        try:
            type, length = struct.unpack_from('>HH', section_data, 2 + adv)
        except struct.error:
            break

        # CharSet
        if type == 1:
            val, = struct.unpack('>H', section_data[6+adv:8+adv])
            default_encoding = MIBNUM_TO_NAME.get(val, 'latin-1')
        # Author
        elif type == 4:
            author = section_data[6+adv+(2*length)]
        # Title
        elif type == 5:
            title = section_data[6+adv+(2*length)]
        # Publication Date
        elif type == 6:
            pubdate, = struct.unpack('>I', section_data[6+adv:6+adv+4])

        adv += 2*length

    if title:
        mi.title = title.replace('\0', '').decode(default_encoding, 'replace')
    if author:
        author = author.replace('\0', '').decode(default_encoding, 'replace')
        mi.author = author.split(',')
    mi.pubdate = datetime.fromtimestamp(pubdate)

    return mi
Beispiel #14
0
 def as_text(self):
     entries = []
     for i in range(self.dup_list.topLevelItemCount()):
         x = self.dup_list.topLevelItem(i)
         check = '✓' if x.checkState(0) == Qt.Checked else '✗'
         title = '%s %s' % (check, unicode_type(x.text(0)))
         dups = []
         for child in (x.child(j) for j in range(x.childCount())):
             dups.append('\t' + unicode_type(child.text(0)))
         entries.append(title + '\n' + '\n'.join(dups))
     return '\n\n'.join(entries)
Beispiel #15
0
 def check_for_mem_leak(self):
     import gc
     from calibre.utils.mem import memory
     memory()
     for num in (1, 10, 100):
         start = memory()
         for i in range(num):
             self()
         for i in range(3):
             gc.collect()
         print('Mem consumption increased by:', memory() - start, 'MB', end=' ')
         print('after', num, 'repeats')
Beispiel #16
0
def test_mem():
    from calibre.utils.mem import memory
    import gc
    gc.collect()
    start_mem = memory()
    raw = P('fonts/liberation/LiberationSerif-Regular.ttf', data=True)
    calls = 1000
    for i in range(calls):
        subset(raw, (), (('a', 'z'),))
    del raw
    for i in range(3):
        gc.collect()
    print('Leaked memory per call:', (memory() - start_mem)/calls*1024, 'KB')
Beispiel #17
0
 def measure_memory_usage(self, repetitions, func, *args, **kwargs):
     from calibre.utils.mem import memory
     gc.disable()
     try:
         start_mem = memory()
         for i in range(repetitions):
             func(*args, **kwargs)
         for i in range(3):
             gc.collect()
         end_mem = memory()
     finally:
         gc.enable()
     return end_mem - start_mem
Beispiel #18
0
def get_matching_rules(rules, font):
    matches = []

    # Filter on family
    for rule in reversed(rules):
        ff = frozenset(icu_lower(x) for x in font.get('font-family', []))
        if ff.intersection(rule['font-family']):
            matches.append(rule)
    if not matches:
        return []

    # Filter on font stretch
    width = widths[font.get('font-stretch', 'normal')]

    min_dist = min(abs(width-y['width']) for y in matches)
    nearest = [x for x in matches if abs(width-x['width']) == min_dist]
    if width <= 4:
        lmatches = [f for f in nearest if f['width'] <= width]
    else:
        lmatches = [f for f in nearest if f['width'] >= width]
    matches = (lmatches or nearest)

    # Filter on font-style
    fs = font.get('font-style', 'normal')
    order = {
            'oblique':['oblique', 'italic', 'normal'],
            'normal':['normal', 'oblique', 'italic']
        }.get(fs, ['italic', 'oblique', 'normal'])
    for q in order:
        m = [f for f in matches if f.get('font-style', 'normal') == q]
        if m:
            matches = m
            break

    # Filter on font weight
    fw = int(font.get('font-weight', '400'))
    if fw == 400:
        q = [400, 500, 300, 200, 100, 600, 700, 800, 900]
    elif fw == 500:
        q = [500, 400, 300, 200, 100, 600, 700, 800, 900]
    elif fw < 400:
        q = [fw] + list(range(fw-100, -100, -100)) + list(range(fw+100,
            100, 1000))
    else:
        q = [fw] + list(range(fw+100, 100, 1000)) + list(range(fw-100,
            -100, -100))
    for wt in q:
        m = [f for f in matches if f['weight'] == wt]
        if m:
            return m
    return []
Beispiel #19
0
def insert_images_into_markup(parts, resource_map, log):
    # Handle any embedded raster images links in the xhtml text
    # kindle:embed:XXXX?mime=image/gif (png, jpeg, etc) (used for images)
    img_pattern = re.compile(r'''(<[img\s|image\s][^>]*>)''', re.IGNORECASE)
    img_index_pattern = re.compile(r'''[('"]kindle:embed:([0-9|A-V]+)[^')"]*[)'"]''')

    style_pattern = re.compile(r'''(<[a-zA-Z0-9]+\s[^>]*style\s*=\s*[^>]*>)''',
            re.IGNORECASE)

    for i in range(len(parts)):
        part = parts[i]
        srcpieces = img_pattern.split(part)
        for j in range(1, len(srcpieces), 2):
            tag = srcpieces[j]
            if tag.startswith('<im'):
                for m in img_index_pattern.finditer(tag):
                    num = int(m.group(1), 32)
                    href = resource_map[num-1]
                    if href:
                        replacement = '"%s"'%('../' + href)
                        tag = img_index_pattern.sub(replacement, tag, 1)
                    else:
                        log.warn('Referenced image %s was not recognized as '
                                'a valid image in %s' % (num, tag))
                srcpieces[j] = tag
        part = "".join(srcpieces)
        # store away modified version
        parts[i] = part

    # Replace urls used in style attributes
    for i in range(len(parts)):
        part = parts[i]
        srcpieces = style_pattern.split(part)
        for j in range(1, len(srcpieces), 2):
            tag = srcpieces[j]
            if 'kindle:embed' in tag:
                for m in img_index_pattern.finditer(tag):
                    num = int(m.group(1), 32)
                    href = resource_map[num-1]
                    osep = m.group()[0]
                    csep = m.group()[-1]
                    if href:
                        replacement = '%s%s%s'%(osep, '../' + href, csep)
                        tag = img_index_pattern.sub(replacement, tag, 1)
                    else:
                        log.warn('Referenced image %s was not recognized as '
                                'a valid image in %s' % (num, tag))
                srcpieces[j] = tag
        part = "".join(srcpieces)
        # store away modified version
        parts[i] = part
Beispiel #20
0
 def get_state(self):
     h = self.column_header
     cm = self.column_map
     state = {}
     state['hidden_columns'] = [cm[i] for i in range(h.count())
             if h.isSectionHidden(i) and cm[i] != 'ondevice']
     state['column_positions'] = {}
     state['column_sizes'] = {}
     for i in range(h.count()):
         name = cm[i]
         state['column_positions'][name] = h.visualIndex(i)
         if name != 'ondevice':
             state['column_sizes'][name] = h.sectionSize(i)
     return state
Beispiel #21
0
def generate_test_db(library_path,  # {{{
        num_of_records=20000,
        num_of_authors=6000,
        num_of_tags=10000,
        tag_length=7,
        author_length=7,
        title_length=10,
        max_authors=10,
        max_tags=10
        ):
    import random, string, os, sys, time
    from calibre.constants import preferred_encoding

    if not os.path.exists(library_path):
        os.makedirs(library_path)

    letters = string.letters.decode(preferred_encoding)

    def randstr(length):
        return ''.join(random.choice(letters) for i in
                range(length))

    all_tags = [randstr(tag_length) for j in range(num_of_tags)]
    print('Generated', num_of_tags, 'tags')
    all_authors = [randstr(author_length) for j in range(num_of_authors)]
    print('Generated', num_of_authors, 'authors')
    all_titles = [randstr(title_length) for j in range(num_of_records)]
    print('Generated', num_of_records, 'titles')

    testdb = db(library_path)

    print('Creating', num_of_records, 'records...')

    start = time.time()

    for i, title in enumerate(all_titles):
        print(i+1, end=' ')
        sys.stdout.flush()
        authors = random.randint(1, max_authors)
        authors = [random.choice(all_authors) for i in range(authors)]
        tags = random.randint(0, max_tags)
        tags = [random.choice(all_tags) for i in range(tags)]
        from calibre.ebooks.metadata.book.base import Metadata
        mi = Metadata(title, authors)
        mi.tags = tags
        testdb.import_book(mi, [])

    t = time.time() - start
    print('\nGenerated', num_of_records, 'records in:', t, 'seconds')
    print('Time per record:', t/float(num_of_records))
Beispiel #22
0
 def number_of_columns(self):
     # Number of columns currently visible in the grid
     if self._ncols is None:
         step = max(10, self.spacing())
         for y in range(step, 500, step):
             for x in range(step, 500, step):
                 i = self.indexAt(QPoint(x, y))
                 if i.isValid():
                     for x in range(self.viewport().width() - step, self.viewport().width() - 300, -step):
                         j = self.indexAt(QPoint(x, y))
                         if j.isValid():
                             self._ncols = j.row() - i.row() + 1
                             return self._ncols
     return self._ncols
Beispiel #23
0
 def find(self, backwards=False):
     i = self.view.currentIndex().row()
     if i < 0:
         i = 0
     q = icu_lower(unicode_type(self.search.text())).strip()
     if not q:
         return
     r = (range(i-1, -1, -1) if backwards else range(i+1,
         len(self.families)))
     for j in r:
         f = self.families[j]
         if q in icu_lower(f):
             self.set_current(j)
             return
Beispiel #24
0
 def fset(self, val):
     if self.count() < 2:
         return
     if val == 0 and not self.is_side_index_hidden:
         self.save_state()
     sizes = list(self.sizes())
     for i in range(len(sizes)):
         sizes[i] = val if i == self.side_index else 10
     self.setSizes(sizes)
     total = sum(self.sizes())
     sizes = list(self.sizes())
     for i in range(len(sizes)):
         sizes[i] = val if i == self.side_index else total-val
     self.setSizes(sizes)
     self.initialize()
Beispiel #25
0
    def test_static_generation(self):  # {{{
        'Test static generation'
        nums = list(map(str, range(10)))

        def handler(conn):
            return conn.generate_static_output('test', nums.pop)
        with TestServer(handler) as server:
            conn = server.connect()
            conn.request('GET', '/an_etagged_path')
            r = conn.getresponse()
            data = r.read()
            for i in range(5):
                conn.request('GET', '/an_etagged_path')
                r = conn.getresponse()
                self.assertEqual(data, r.read())
Beispiel #26
0
    def line_histogram(self, percent):
        '''
        Creates a broad histogram of the document to determine whether it incorporates hard
        line breaks.  Lines are sorted into 20 'buckets' based on length.
        percent is the percentage of lines that should be in a single bucket to return true
        The majority of the lines will exist in 1-2 buckets in typical docs with hard line breaks
        '''
        minLineLength=20  # Ignore lines under 20 chars (typical of spaces)
        maxLineLength=1900  # Discard larger than this to stay in range
        buckets=20  # Each line is divided into a bucket based on length

        # print("there are "+unicode_type(len(lines))+" lines")
        # max = 0
        # for line in self.lines:
        #    l = len(line)
        #    if l > max:
        #        max = l
        # print("max line found is "+unicode_type(max))
        # Build the line length histogram
        hRaw = [0 for i in range(0,buckets)]
        for line in self.lines:
            l = len(line)
            if l > minLineLength and l < maxLineLength:
                l = int(l // 100)
                # print("adding "+unicode_type(l))
                hRaw[l]+=1

        # Normalize the histogram into percents
        totalLines = len(self.lines)
        if totalLines > 0:
            h = [float(count)/totalLines for count in hRaw]
        else:
            h = []
        # print("\nhRaw histogram lengths are: "+unicode_type(hRaw))
        # print("              percents are: "+unicode_type(h)+"\n")

        # Find the biggest bucket
        maxValue = 0
        for i in range(0,len(h)):
            if h[i] > maxValue:
                maxValue = h[i]

        if maxValue < percent:
            # print("Line lengths are too variable. Not unwrapping.")
            return False
        else:
            # print(unicode_type(maxValue)+" of the lines were in one bucket")
            return True
Beispiel #27
0
 def fget(self):
     ftypes = []
     for i in range(self.types.count()):
         i = self.types.item(i)
         if i.checkState() == Qt.Checked:
             ftypes.append(i.data(Qt.UserRole))
     return {'description':self.name.text().strip(), 'trigger':self.trig.text(), 'template':self.template.toPlainText(), 'syntaxes':ftypes}
Beispiel #28
0
    def apply_recommendations(self, recs):
        '''
        Handle the legacy sr* options that may have been previously saved. They
        are applied only if the new search_replace option has not been set in
        recs.
        '''
        new_val = None
        legacy = {}
        rest = {}
        for name, val in recs.items():
            if name == 'search_replace':
                new_val = val
                if name in getattr(recs, 'disabled_options', []):
                    self.search_replace.setDisabled(True)
            elif name.startswith('sr'):
                legacy[name] = val if val else ''
            else:
                rest[name] = val

        if rest:
            super(SearchAndReplaceWidget, self).apply_recommendations(rest)

        self.set_value(self.opt_search_replace, None)
        if new_val is None and legacy:
            for i in range(1, 4):
                x = 'sr%d'%i
                s, r = x+'_search', x+'_replace'
                s, r = legacy.get(s, ''), legacy.get(r, '')
                if s:
                    self.sr_add_row(s, r)
        if new_val is not None:
            self.set_value(self.opt_search_replace, new_val)
Beispiel #29
0
 def linearize(self):
     self.elements = []
     for x in self.columns:
         self.elements.extend(x)
     self.boxes = [Box()]
     for i, elem in enumerate(self.elements):
         if isinstance(elem, Image):
             self.boxes.append(ImageBox(elem))
             img = Interval(elem.left, elem.right)
             for j in range(i+1, len(self.elements)):
                 t = self.elements[j]
                 if not isinstance(t, Text):
                     break
                 ti = Interval(t.left, t.right)
                 if not ti.centered_in(img):
                     break
                 self.boxes[-1].append(t)
             self.boxes.append(Box())
         else:
             is_indented = False
             if i+1 < len(self.elements):
                 indent_diff = elem.indent_fraction - \
                     self.elements[i+1].indent_fraction
                 if indent_diff > 0.05:
                     is_indented = True
             if elem.top_gap_ratio > 1.2 or is_indented:
                 self.boxes.append(Box())
             self.boxes[-1].append(elem)
Beispiel #30
0
def write_font_record(data, obfuscate=True, compress=True):
    '''
    Write the ttf/otf font represented by data into a font record. See
    read_font_record() for details on the format of the record.
    '''

    flags = 0
    key_len = 20
    usize = len(data)
    xor_key = b''
    if compress:
        flags |= 0b1
        data = zlib.compress(data, 9)
    if obfuscate and len(data) >= 1040:
        flags |= 0b10
        xor_key = os.urandom(key_len)
        key = bytearray(xor_key)
        data = bytearray(data)
        for i in range(1040):
            data[i] ^= key[i%key_len]
        data = bytes(data)

    key_start = struct.calcsize(b'>5L') + 4
    data_start = key_start + len(xor_key)

    header = b'FONT' + struct.pack(b'>5L', usize, flags, data_start,
            len(xor_key), key_start)

    return header + xor_key + data
Beispiel #31
0
    def __init__(self, gui, initial_panel=None):
        QDialog.__init__(self, gui)
        self.l = l = QGridLayout(self)
        self.setLayout(l)
        self.setWindowTitle(_('Preferences for Edit book'))
        self.setWindowIcon(QIcon(I('config.png')))

        self.stacks = QStackedWidget(self)
        l.addWidget(self.stacks, 0, 1, 1, 1)

        self.categories_list = cl = QListWidget(self)
        cl.currentRowChanged.connect(self.stacks.setCurrentIndex)
        cl.clearPropertyFlags()
        cl.setViewMode(QListView.ViewMode.IconMode)
        cl.setFlow(QListView.Flow.TopToBottom)
        cl.setMovement(QListView.Movement.Static)
        cl.setWrapping(False)
        cl.setSpacing(15)
        if get_lang()[:2] not in ('zh', 'ja'):
            cl.setWordWrap(True)
        l.addWidget(cl, 0, 0, 1, 1)

        self.bb = bb = QDialogButtonBox(QDialogButtonBox.StandardButton.Ok | QDialogButtonBox.StandardButton.Cancel)
        bb.accepted.connect(self.accept)
        bb.rejected.connect(self.reject)
        self.rdb = b = bb.addButton(_('Restore all &defaults'), QDialogButtonBox.ButtonRole.ResetRole)
        b.setToolTip(_('Restore defaults for all preferences'))
        b.clicked.connect(self.restore_all_defaults)
        self.rcdb = b = bb.addButton(_('Restore &current defaults'), QDialogButtonBox.ButtonRole.ResetRole)
        b.setToolTip(_('Restore defaults for currently displayed preferences'))
        b.clicked.connect(self.restore_current_defaults)
        self.rconfs = b = bb.addButton(_('Restore c&onfirmations'), QDialogButtonBox.ButtonRole.ResetRole)
        b.setToolTip(_('Restore all disabled confirmation prompts'))
        b.clicked.connect(self.restore_confirmations)

        l.addWidget(bb, 1, 0, 1, 2)

        self.resize(800, 600)
        geom = tprefs.get('preferences_geom', None)
        if geom is not None:
            QApplication.instance().safe_restore_geometry(self, geom)

        self.keyboard_panel = ShortcutConfig(self)
        self.keyboard_panel.initialize(gui.keyboard)
        self.editor_panel = EditorSettings(self)
        self.integration_panel = IntegrationSettings(self)
        self.main_window_panel = MainWindowSettings(self)
        self.preview_panel = PreviewSettings(self)
        self.toolbars_panel = ToolbarSettings(self)

        for name, icon, panel in [
            (_('Main window'), 'page.png', 'main_window'),
            (_('Editor settings'), 'modified.png', 'editor'),
            (_('Preview settings'), 'viewer.png', 'preview'),
            (_('Keyboard shortcuts'), 'keyboard-prefs.png', 'keyboard'),
            (_('Toolbars'), 'wizard.png', 'toolbars'),
            (_('Integration with calibre'), 'lt.png', 'integration'),
        ]:
            i = QListWidgetItem(QIcon(I(icon)), name, cl)
            i.setToolTip(name)
            cl.addItem(i)
            self.stacks.addWidget(getattr(self, panel + '_panel'))

        cl.setCurrentRow(0)
        cl.item(0).setSelected(True)
        w, h = cl.sizeHintForColumn(0), 0
        for i in range(cl.count()):
            h = cl.sizeHintForRow(i)
            cl.item(i).setSizeHint(QSize(w, h))

        cl.setMaximumWidth(cl.sizeHintForColumn(0) + 35)
        cl.setHorizontalScrollBarPolicy(Qt.ScrollBarPolicy.ScrollBarAlwaysOff)
        cl.setMinimumWidth(min(cl.maximumWidth(), cl.sizeHint().width()))
Beispiel #32
0
 def accept(self):
     tprefs.set('preferences_geom', bytearray(self.saveGeometry()))
     for i in range(self.stacks.count()):
         w = self.stacks.widget(i)
         w.commit()
     QDialog.accept(self)
Beispiel #33
0
 def rename_editor(self, editor, name):
     for i in range(self.editor_tabs.count()):
         if self.editor_tabs.widget(i) is editor:
             fname = name.rpartition('/')[2]
             self.editor_tabs.setTabText(i, fname)
             self.editor_tabs.setTabToolTip(i, _('Full path:') + ' ' + name)
Beispiel #34
0
 def __iter__(self):
     for i in range(self.bookmarks_list.count()):
         yield self.item_to_bm(self.bookmarks_list.item(i))
Beispiel #35
0
 def redraw_spinners(self):
     m = self.model()
     for r in range(m.rowCount()):
         idx = m.index(r)
         if bool(m.data(idx, Qt.UserRole)):
             m.dataChanged.emit(idx, idx)
Beispiel #36
0
 def create_from_all_headings(self):
     self.create_from_xpath.emit(['//h:h%d' % i for i in range(1, 7)], True)
Beispiel #37
0
    def __init__(self, header, stream, log, options):
        self.stream = stream
        self.log = log
        self.options = options

        # Mapping of section uid to our internal
        # list of sections.
        self.uid_section_number = OrderedDict()
        self.uid_text_secion_number = OrderedDict()
        self.uid_text_secion_encoding = {}
        self.uid_image_section_number = {}
        self.uid_composite_image_section_number = {}
        self.metadata_section_number = None
        self.default_encoding = 'latin-1'
        self.owner_id = None
        self.sections = []

        # The Plucker record0 header
        self.header_record = HeaderRecord(header.section_data(0))

        for i in range(1, header.num_sections):
            section_number = len(self.sections)
            # The length of the section header.
            # Where the actual data in the section starts.
            start = 8
            section = None

            raw_data = header.section_data(i)
            # Every sections has a section header.
            section_header = SectionHeader(raw_data)

            # Store sections we care able.
            if section_header.type in (DATATYPE_PHTML,
                                       DATATYPE_PHTML_COMPRESSED):
                self.uid_text_secion_number[
                    section_header.uid] = section_number
                section = SectionText(section_header, raw_data[start:])
            elif section_header.type in (DATATYPE_TBMP,
                                         DATATYPE_TBMP_COMPRESSED):
                self.uid_image_section_number[
                    section_header.uid] = section_number
                section = raw_data[start:]
            elif section_header.type == DATATYPE_METADATA:
                self.metadata_section_number = section_number
                section = SectionMetadata(raw_data[start:])
            elif section_header.type == DATATYPE_COMPOSITE_IMAGE:
                self.uid_composite_image_section_number[
                    section_header.uid] = section_number
                section = SectionCompositeImage(raw_data[start:])

            # Store the section.
            if section:
                self.uid_section_number[section_header.uid] = section_number
                self.sections.append((section_header, section))

        # Store useful information from the metadata section locally
        # to make access easier.
        if self.metadata_section_number:
            mdata_section = self.sections[self.metadata_section_number][1]
            for k, v in mdata_section.exceptional_uid_encodings.items():
                self.uid_text_secion_encoding[k] = v
            self.default_encoding = mdata_section.default_encoding
            self.owner_id = mdata_section.owner_id

        # Get the metadata (tile, author, ...) with the metadata reader.
        from calibre.ebooks.metadata.pdb import get_metadata
        self.mi = get_metadata(stream, False)
Beispiel #38
0
 def select_none(self):
     for i in range(self.dup_list.topLevelItemCount()):
         x = self.dup_list.topLevelItem(i)
         x.setCheckState(0, Qt.CheckState.Unchecked)
Beispiel #39
0
def add_header_footer(manager,
                      opts,
                      pdf_doc,
                      container,
                      page_number_display_map,
                      page_layout,
                      page_margins_map,
                      pdf_metadata,
                      report_progress,
                      toc=None):
    header_template, footer_template = opts.pdf_header_template, opts.pdf_footer_template
    if not footer_template and opts.pdf_page_numbers:
        footer_template = PAGE_NUMBER_TEMPLATE
    if not header_template and not footer_template:
        return
    report_progress(0.8, _('Adding headers and footers'))
    name = create_skeleton(container)
    root = container.parsed(name)
    reset_css = 'margin: 0; padding: 0; border-width: 0; background-color: unset;'
    root.set('style', reset_css)
    body = last_tag(root)
    body.attrib.pop('id', None)
    body.set('style', reset_css)
    job = job_for_name(container, name, Margins(0, 0, 0, 0), page_layout)

    def m(tag_name, text=None, style=None, **attrs):
        ans = root.makeelement(XHTML(tag_name), **attrs)
        if text is not None:
            ans.text = text
        if style is not None:
            style = '; '.join('{}: {}'.format(k, v)
                              for k, v in iteritems(style))
            ans.set('style', style)
        return ans

    justify = 'flex-end'
    if header_template:
        justify = 'space-between' if footer_template else 'flex-start'

    def create_toc_stack(iterator):
        ans = []
        for level, child in iterator:
            pdf_loc = getattr(child, 'pdf_loc', None)
            if pdf_loc is not None and pdf_loc.pagenum > 0:
                ans.append((level, pdf_loc.pagenum, child.title))
        return ans

    def stack_to_map(stack):
        ans = []
        stack_pos = 0
        current, page_for_current, level_for_current = '', -1, -1
        stack_len = len(stack)
        for page in range(1, pdf_doc.page_count() + 1):
            while stack_pos < stack_len:
                level, pagenum, title = stack[stack_pos]
                if pagenum != page:
                    break
                if pagenum != page_for_current or level > level_for_current:
                    page_for_current = pagenum
                    level_for_current = level
                    current = title
                stack_pos += 1
            ans.append(current)
        return ans

    def page_counts_map(iterator):
        pagenums = []
        for level, child in iterator:
            pdf_loc = getattr(child, 'pdf_loc', None)
            if pdf_loc is not None and pdf_loc.pagenum > 0:
                pagenums.append(pdf_loc.pagenum)
        stack = []
        for i, pagenum in enumerate(pagenums):
            next_page_num = pagenums[i + 1] if i + 1 < len(pagenums) else (
                pdf_doc.page_count() + 1)
            stack.append((pagenum, next_page_num - pagenum))
        totals = []
        section_nums = []
        stack_len = len(stack)
        stack_pos = 0
        current, page_for_current, counter = 0, -1, 0
        for page in range(1, pdf_doc.page_count() + 1):
            while stack_pos < stack_len:
                pagenum, pages = stack[stack_pos]
                if pagenum != page:
                    break
                if pagenum != page_for_current:
                    current = pages
                    page_for_current = pagenum
                    counter = 0
                stack_pos += 1
            counter += 1
            totals.append(current)
            section_nums.append(counter)
        return totals, section_nums

    if toc is None:
        page_toc_map = stack_to_map(())
        toplevel_toc_map = stack_to_map(())
        toplevel_pagenum_map, toplevel_pages_map = page_counts_map(())
    else:
        page_toc_map = stack_to_map(
            create_toc_stack(toc.iterdescendants(level=0)))

        def tc():
            for x in toc:
                yield 0, x

        toplevel_toc_map = stack_to_map(create_toc_stack(tc()))
        toplevel_pagenum_map, toplevel_pages_map = page_counts_map(tc())

    def create_container(page_num, margins):
        style = {
            'page-break-inside': 'avoid',
            'page-break-after': 'always',
            'display': 'flex',
            'flex-direction': 'column',
            'height': '100vh',
            'justify-content': justify,
            'margin-left': '{}pt'.format(margins.left),
            'margin-right': '{}pt'.format(margins.right),
            'margin-top': '0',
            'margin-bottom': '0',
            'padding': '0',
            'border-width': '0',
            'overflow': 'hidden',
            'background-color': 'unset',
        }

        ans = m('div', style=style, id='p{}'.format(page_num))
        return ans

    def format_template(template, page_num, height):
        template = template.replace(
            '_TOP_LEVEL_SECTION_PAGES_',
            unicode_type(toplevel_pagenum_map[page_num - 1]))
        template = template.replace(
            '_TOP_LEVEL_SECTION_PAGENUM_',
            unicode_type(toplevel_pages_map[page_num - 1]))
        template = template.replace('_TOTAL_PAGES_',
                                    unicode_type(pages_in_doc))
        template = template.replace(
            '_PAGENUM_', unicode_type(page_number_display_map[page_num]))
        template = template.replace(
            '_TITLE_', prepare_string_for_xml(pdf_metadata.title, True))
        template = template.replace(
            '_AUTHOR_', prepare_string_for_xml(pdf_metadata.author, True))
        template = template.replace(
            '_TOP_LEVEL_SECTION_',
            prepare_string_for_xml(toplevel_toc_map[page_num - 1]))
        template = template.replace(
            '_SECTION_', prepare_string_for_xml(page_toc_map[page_num - 1]))
        troot = parse(template, namespace_elements=True)
        ans = last_tag(troot)[0]
        style = ans.get('style') or ''
        style = (
            'margin: 0; padding: 0; height: {height}pt; border-width: 0;'
            'display: flex; align-items: center; overflow: hidden; background-color: unset;'
        ).format(height=height) + style
        ans.set('style', style)
        for child in ans.xpath('descendant-or-self::*[@class]'):
            cls = frozenset(child.get('class').split())
            q = 'even-page' if page_num % 2 else 'odd-page'
            if q in cls or q.replace('-', '_') in cls:
                style = child.get('style') or ''
                child.set('style', style + '; display: none')
        return ans

    pages_in_doc = pdf_doc.page_count()

    for page_num in range(1, pages_in_doc + 1):
        margins = page_margins_map[page_num - 1]
        div = create_container(page_num, margins)
        body.append(div)
        if header_template:
            div.append(format_template(header_template, page_num, margins.top))
        if footer_template:
            div.append(
                format_template(footer_template, page_num, margins.bottom))

    container.commit()
    # print(open(job[0]).read())
    results = manager.convert_html_files([job], settle_time=1)
    data = results[name]
    if not isinstance(data, bytes):
        raise SystemExit(data)
    # open('/t/impose.pdf', 'wb').write(data)
    doc = data_as_pdf_doc(data)
    first_page_num = pdf_doc.page_count()
    num_pages = doc.page_count()
    if first_page_num != num_pages:
        raise ValueError(
            'The number of header/footers pages ({}) != number of document pages ({})'
            .format(num_pages, first_page_num))
    pdf_doc.append(doc)
    pdf_doc.impose(1, first_page_num + 1, num_pages)
    report_progress(0.9, _('Headers and footers added'))
Beispiel #40
0
 def __iter__(self):
     for i in range(self.items.count()):
         yield self.items.item(i)
Beispiel #41
0
    def replace(self, alo, ahi, blo, bhi):
        ''' When replacing one block of lines with another, search the blocks
        for *similar* lines; the best-matching pair (if any) is used as a synch
        point, and intraline difference marking is done on the similar pair.
        Lots of work, but often worth it.  '''
        alo, ahi, blo, bhi = self.trim_identical_leading_lines(
            alo, ahi, blo, bhi)
        if alo == ahi and blo == bhi:
            return
        if ahi + bhi - alo - blo > 100:
            # Too many lines, this will be too slow
            # http://bugs.python.org/issue6931
            return self.do_replace(alo, ahi, blo, bhi)
        # don't synch up unless the lines have a similarity score of at
        # least cutoff; best_ratio tracks the best score seen so far
        best_ratio, cutoff = 0.74, 0.75
        cruncher = SequenceMatcher()
        eqi, eqj = None, None  # 1st indices of equal lines (if any)
        a, b = self.left_lines, self.right_lines

        # search for the pair that matches best without being identical
        # (identical lines must be junk lines, & we don't want to synch up
        # on junk -- unless we have to)
        for j in range(blo, bhi):
            bj = b[j]
            cruncher.set_seq2(bj)
            for i in range(alo, ahi):
                ai = a[i]
                if ai == bj:
                    if eqi is None:
                        eqi, eqj = i, j
                    continue
                cruncher.set_seq1(ai)
                # computing similarity is expensive, so use the quick
                # upper bounds first -- have seen this speed up messy
                # compares by a factor of 3.
                # note that ratio() is only expensive to compute the first
                # time it's called on a sequence pair; the expensive part
                # of the computation is cached by cruncher
                if (cruncher.real_quick_ratio() > best_ratio
                        and cruncher.quick_ratio() > best_ratio
                        and cruncher.ratio() > best_ratio):
                    best_ratio, best_i, best_j = cruncher.ratio(), i, j
        if best_ratio < cutoff:
            # no non-identical "pretty close" pair
            if eqi is None:
                # no identical pair either -- treat it as a straight replace
                self.do_replace(alo, ahi, blo, bhi)
                return
            # no close pair, but an identical pair -- synch up on that
            best_i, best_j, best_ratio = eqi, eqj, 1.0
        else:
            # there's a close pair, so forget the identical pair (if any)
            eqi = None

        # a[best_i] very similar to b[best_j]; eqi is None iff they're not
        # identical

        # pump out diffs from before the synch point
        self.replace_helper(alo, best_i, blo, best_j)

        # do intraline marking on the synch pair
        if eqi is None:
            self.do_replace(best_i, best_i + 1, best_j, best_j + 1)
        else:
            # the synch pair is identical
            self.equal(best_i, best_i + 1, best_j, best_j + 1)

        # pump out diffs from after the synch point
        self.replace_helper(best_i + 1, ahi, best_j + 1, bhi)
Beispiel #42
0
 def all_files(self):
     return (category.child(i) for category in itervalues(self.categories)
             for i in range(category.childCount()))
Beispiel #43
0
 def calculate_metrics(self):
     w = self.fontMetrics()
     self.number_width = max(
         map(lambda x: w.width(unicode_type(x)), range(10)))
     self.space_width = w.width(' ')
Beispiel #44
0
    def spread_gradient(self, gradient, pixel_page_width, pixel_page_height,
                        matrix):
        start = gradient.start()
        stop = gradient.finalStop()
        stops = list(map(lambda x: [x[0], x[1].getRgbF()], gradient.stops()))
        spread = gradient.spread()
        if spread != gradient.PadSpread:
            inv = matrix.inverted()[0]
            page_rect = tuple(map(inv.map, (
                QPointF(0, 0), QPointF(pixel_page_width, 0), QPointF(0, pixel_page_height),
                QPointF(pixel_page_width, pixel_page_height))))
            maxx = maxy = -sys.maxsize-1
            minx = miny = sys.maxsize

            for p in page_rect:
                minx, maxx = min(minx, p.x()), max(maxx, p.x())
                miny, maxy = min(miny, p.y()), max(maxy, p.y())

            def in_page(point):
                return (minx <= point.x() <= maxx and miny <= point.y() <= maxy)

            offset = stop - start
            llimit, rlimit = start, stop

            reflect = False
            base_stops = copy.deepcopy(stops)
            reversed_stops = list(reversed(stops))
            do_reflect = spread == gradient.ReflectSpread
            totl = abs(stops[-1][0] - stops[0][0])
            intervals = [abs(stops[i+1][0] - stops[i][0])/totl
                         for i in range(len(stops)-1)]

            while in_page(llimit):
                reflect ^= True
                llimit -= offset
                estops = reversed_stops if (reflect and do_reflect) else base_stops
                stops = copy.deepcopy(estops) + stops

            first_is_reflected = reflect
            reflect = False

            while in_page(rlimit):
                reflect ^= True
                rlimit += offset
                estops = reversed_stops if (reflect and do_reflect) else base_stops
                stops = stops + copy.deepcopy(estops)

            start, stop = llimit, rlimit

            num = len(stops) // len(base_stops)
            if num > 1:
                # Adjust the stop parameter values
                t = base_stops[0][0]
                rlen = totl/num
                reflect = first_is_reflected ^ True
                intervals = [i*rlen for i in intervals]
                rintervals = list(reversed(intervals))

                for i in range(num):
                    reflect ^= True
                    pos = i * len(base_stops)
                    tvals = [t]
                    for ival in (rintervals if reflect and do_reflect else
                                 intervals):
                        tvals.append(tvals[-1] + ival)
                    for j in range(len(base_stops)):
                        stops[pos+j][0] = tvals[j]
                    t = tvals[-1]

                # In case there were rounding errors
                stops[-1][0] = base_stops[-1][0]

        return start, stop, tuple(Stop(s[0], s[1]) for s in stops)
Beispiel #45
0
 def editor_modified(self, *args):
     tb = self.editor_tabs.tabBar()
     for i in range(self.editor_tabs.count()):
         editor = self.editor_tabs.widget(i)
         modified = getattr(editor, 'is_modified', False)
         tb.setTabIcon(i, self.modified_icon if modified else QIcon())
Beispiel #46
0
def convert(opf_path,
            opts,
            metadata=None,
            output_path=None,
            log=default_log,
            cover_data=None,
            report_progress=lambda x, y: None):
    container = Container(opf_path, log)
    fix_markup(container)
    report_progress(0.05, _('Parsed all content for markup transformation'))
    if opts.pdf_hyphenate:
        from calibre.ebooks.oeb.polish.hyphenation import add_soft_hyphens
        add_soft_hyphens(container)
    has_maths = add_maths_script(container)
    fix_fullscreen_images(container)

    name_anchor_map = make_anchors_unique(container, log)
    margin_files = tuple(create_margin_files(container))
    toc = get_toc(container, verify_destinations=False)
    has_toc = toc and len(toc)
    links_page_uuid = add_all_links(container, margin_files)
    container.commit()
    report_progress(0.1, _('Completed markup transformation'))

    manager = RenderManager(opts, log, container.root)
    page_layout = get_page_layout(opts)
    pdf_doc = None
    anchor_locations = {}
    jobs = []
    for margin_file in margin_files:
        jobs.append(
            job_for_name(container, margin_file.name, margin_file.margins,
                         page_layout))
    results = manager.convert_html_files(jobs,
                                         settle_time=1,
                                         has_maths=has_maths)
    num_pages = 0
    page_margins_map = []
    for margin_file in margin_files:
        name = margin_file.name
        data = results[name]
        if not isinstance(data, bytes):
            raise SystemExit(data)
        doc = data_as_pdf_doc(data)
        anchor_locations.update(
            get_anchor_locations(name, doc, num_pages + 1, links_page_uuid,
                                 log))
        doc_pages = doc.page_count()
        page_margins_map.extend(
            repeat(resolve_margins(margin_file.margins, page_layout),
                   doc_pages))
        num_pages += doc_pages

        if pdf_doc is None:
            pdf_doc = doc
        else:
            pdf_doc.append(doc)

    page_number_display_map = get_page_number_display_map(
        manager, opts, num_pages, log)

    if has_toc:
        annotate_toc(toc, anchor_locations, name_anchor_map, log)
        if opts.pdf_add_toc:
            tocname = create_skeleton(container)
            root = container.parsed(tocname)
            add_pagenum_toc(root, toc, opts, page_number_display_map)
            container.commit()
            jobs = [job_for_name(container, tocname, None, page_layout)]
            results = manager.convert_html_files(jobs, settle_time=1)
            tocdoc = data_as_pdf_doc(results[tocname])
            page_margins_map.extend(
                repeat(resolve_margins(None, page_layout),
                       tocdoc.page_count()))
            pdf_doc.append(tocdoc)

    report_progress(0.7, _('Rendered all HTML as PDF'))

    fix_links(pdf_doc, anchor_locations, name_anchor_map, opts.pdf_mark_links,
              log)
    if toc and len(toc):
        add_toc(PDFOutlineRoot(pdf_doc), toc)
    report_progress(0.75, _('Added links to PDF content'))

    pdf_metadata = PDFMetadata(metadata)
    add_header_footer(manager, opts, pdf_doc, container,
                      page_number_display_map, page_layout, page_margins_map,
                      pdf_metadata, report_progress, toc if has_toc else None)

    num_removed = remove_unused_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'unused fonts')

    merge_fonts(pdf_doc, log)
    num_removed = dedup_type3_fonts(pdf_doc)
    if num_removed:
        log('Removed', num_removed, 'duplicated Type3 glyphs')

    num_removed = pdf_doc.dedup_images()
    if num_removed:
        log('Removed', num_removed, 'duplicate images')

    if opts.pdf_odd_even_offset:
        for i in range(1, pdf_doc.page_count()):
            margins = page_margins_map[i]
            mult = -1 if i % 2 else 1
            val = opts.pdf_odd_even_offset
            if abs(val) < min(margins.left, margins.right):
                box = list(pdf_doc.get_page_box("CropBox", i))
                box[0] += val * mult
                pdf_doc.set_page_box("CropBox", i, *box)

    if cover_data:
        add_cover(pdf_doc, cover_data, page_layout, opts)

    if metadata is not None:
        update_metadata(pdf_doc, pdf_metadata)
    report_progress(1, _('Updated metadata in PDF'))

    if opts.uncompressed_pdf:
        pdf_doc.uncompress()

    pdf_data = pdf_doc.write()
    if output_path is None:
        return pdf_data
    with open(output_path, 'wb') as f:
        f.write(pdf_data)
Beispiel #47
0
 def iteritems(self):
     root = self.invisibleRootItem()
     for i in range(root.childCount()):
         sec = root.child(i)
         for k in range(sec.childCount()):
             yield sec.child(k)
Beispiel #48
0
    def used_font(self, style):
        '''
        Given a style find the embedded font that matches it. Returns None if
        no match is found (can happen if no family matches).
        '''
        ff = style.get('font-family', [])
        lnames = {unicode_type(x).lower() for x in ff}
        matching_set = []

        # Filter on font-family
        for ef in self.embedded_fonts:
            flnames = {x.lower() for x in ef.get('font-family', [])}
            if not lnames.intersection(flnames):
                continue
            matching_set.append(ef)
        if not matching_set:
            return None

        # Filter on font-stretch
        widths = {
            x: i
            for i, x in enumerate(('ultra-condensed', 'extra-condensed',
                                   'condensed', 'semi-condensed', 'normal',
                                   'semi-expanded', 'expanded',
                                   'extra-expanded', 'ultra-expanded'))
        }

        width = widths[style.get('font-stretch', 'normal')]
        for f in matching_set:
            f['width'] = widths[style.get('font-stretch', 'normal')]

        min_dist = min(abs(width - f['width']) for f in matching_set)
        nearest = [
            f for f in matching_set if abs(width - f['width']) == min_dist
        ]
        if width <= 4:
            lmatches = [f for f in nearest if f['width'] <= width]
        else:
            lmatches = [f for f in nearest if f['width'] >= width]
        matching_set = (lmatches or nearest)

        # Filter on font-style
        fs = style.get('font-style', 'normal')
        order = {
            'oblique': ['oblique', 'italic', 'normal'],
            'normal': ['normal', 'oblique', 'italic']
        }.get(fs, ['italic', 'oblique', 'normal'])
        for q in order:
            matches = [
                f for f in matching_set if f.get('font-style', 'normal') == q
            ]
            if matches:
                matching_set = matches
                break

        # Filter on font weight
        fw = int(style.get('font-weight', '400'))
        if fw == 400:
            q = [400, 500, 300, 200, 100, 600, 700, 800, 900]
        elif fw == 500:
            q = [500, 400, 300, 200, 100, 600, 700, 800, 900]
        elif fw < 400:
            q = [fw] + list(range(fw - 100, -100, -100)) + list(
                range(fw + 100, 100, 1000))
        else:
            q = [fw] + list(range(fw + 100, 100, 1000)) + list(
                range(fw - 100, -100, -100))
        for wt in q:
            matches = [f for f in matching_set if f['weight'] == wt]
            if matches:
                return matches[0]
Beispiel #49
0
 def restore_all_defaults(self):
     for i in range(self.stacks.count()):
         w = self.stacks.widget(i)
         w.restore_defaults()
Beispiel #50
0
def convert_day_time_schedule(val):
    day_of_week, hour, minute = val
    if day_of_week == -1:
        return (tuple(range(7)), hour, minute)
    return ((day_of_week,), hour, minute)
Beispiel #51
0
 def getter(w):
     return list(map(unicode_type, (w.item(i).text() for i in range(w.count()))))
Beispiel #52
0
def read_font_record(data, extent=1040):
    '''
    Return the font encoded in the MOBI FONT record represented by data.
    The return value in a dict with fields raw_data, font_data, err, ext,
    headers.

    :param extent: The number of obfuscated bytes. So far I have only
    encountered files with 1040 obfuscated bytes. If you encounter an
    obfuscated record for which this function fails, try different extent
    values (easily automated).

    raw_data is the raw data in the font record
    font_data is the decoded font_data or None if an error occurred
    err is not None if some error occurred
    ext is the font type (ttf for TrueType, dat for unknown and failed if an
    error occurred)
    headers is the list of decoded headers from the font record or None if
    decoding failed
    '''
    # Format:
    # bytes  0 -  3:  'FONT'
    # bytes  4 -  7:  Uncompressed size
    # bytes  8 - 11:  flags
    #                   bit 1 - zlib compression
    #                   bit 2 - XOR obfuscated
    # bytes 12 - 15:  offset to start of compressed data
    # bytes 16 - 19:  length of XOR string
    # bytes 19 - 23:  offset to start of XOR data
    # The zlib compressed data begins with 2 bytes of header and
    # has 4 bytes of checksum at the end
    ans = {'raw_data':data, 'font_data':None, 'err':None, 'ext':'failed',
            'headers':None, 'encrypted':False}

    try:
        usize, flags, dstart, xor_len, xor_start = struct.unpack_from(
                b'>LLLLL', data, 4)
    except:
        ans['err'] = 'Failed to read font record header fields'
        return ans
    font_data = data[dstart:]
    ans['headers'] = {'usize':usize, 'flags':bin(flags), 'xor_len':xor_len,
            'xor_start':xor_start, 'dstart':dstart}

    if flags & 0b10:
        # De-obfuscate the data
        key = bytearray(data[xor_start:xor_start+xor_len])
        buf = bytearray(font_data)
        extent = len(font_data) if extent is None else extent
        extent = min(extent, len(font_data))

        for n in range(extent):
            buf[n] ^= key[n%xor_len]  # XOR of buf and key

        font_data = bytes(buf)
        ans['encrypted'] = True

    if flags & 0b1:
        # ZLIB compressed data
        try:
            font_data = zlib.decompress(font_data)
        except Exception as e:
            ans['err'] = 'Failed to zlib decompress font data (%s)'%e
            return ans

        if len(font_data) != usize:
            ans['err'] = 'Uncompressed font size mismatch'
            return ans

    ans['font_data'] = font_data
    sig = font_data[:4]
    ans['ext'] = ('ttf' if sig in {b'\0\1\0\0', b'true', b'ttcf'}
                    else 'otf' if sig == b'OTTO' else 'dat')

    return ans
Beispiel #53
0
    def genesis(self):
        self.prev_lname = self.last_lname = ''
        self.count_changed(0)
        self.action_choose = self.menuless_qaction
        self.action_exim = ac = QAction(_('Export/import all calibre data'), self.gui)
        ac.triggered.connect(self.exim_data)

        self.stats = LibraryUsageStats()
        self.popup_type = (QToolButton.InstantPopup if len(self.stats.stats) > 1 else
                QToolButton.MenuButtonPopup)
        if len(self.stats.stats) > 1:
            self.action_choose.triggered.connect(self.choose_library)
        else:
            self.qaction.triggered.connect(self.choose_library)

        self.choose_menu = self.qaction.menu()

        ac = self.create_action(spec=(_('Pick a random book'), 'random.png',
            None, None), attr='action_pick_random')
        ac.triggered.connect(self.pick_random)

        if not os.environ.get('CALIBRE_OVERRIDE_DATABASE_PATH', None):
            self.choose_menu.addAction(self.action_choose)

            self.quick_menu = QMenu(_('Quick switch'))
            self.quick_menu_action = self.choose_menu.addMenu(self.quick_menu)
            self.rename_menu = QMenu(_('Rename library'))
            self.rename_menu_action = self.choose_menu.addMenu(self.rename_menu)
            self.choose_menu.addAction(ac)
            self.delete_menu = QMenu(_('Remove library'))
            self.delete_menu_action = self.choose_menu.addMenu(self.delete_menu)
            self.choose_menu.addAction(self.action_exim)
        else:
            self.choose_menu.addAction(ac)

        self.rename_separator = self.choose_menu.addSeparator()

        self.switch_actions = []
        for i in range(5):
            ac = self.create_action(spec=('', None, None, None),
                    attr='switch_action%d'%i)
            ac.setObjectName(str(i))
            self.switch_actions.append(ac)
            ac.setVisible(False)
            connect_lambda(ac.triggered, self, lambda self:
                    self.switch_requested(self.qs_locations[int(self.gui.sender().objectName())]),
                    type=Qt.QueuedConnection)
            self.choose_menu.addAction(ac)

        self.rename_separator = self.choose_menu.addSeparator()

        self.maintenance_menu = QMenu(_('Library maintenance'))
        ac = self.create_action(spec=(_('Library metadata backup status'),
                        'lt.png', None, None), attr='action_backup_status')
        ac.triggered.connect(self.backup_status, type=Qt.QueuedConnection)
        self.maintenance_menu.addAction(ac)
        ac = self.create_action(spec=(_('Check library'), 'lt.png',
                                      None, None), attr='action_check_library')
        ac.triggered.connect(self.check_library, type=Qt.QueuedConnection)
        self.maintenance_menu.addAction(ac)
        ac = self.create_action(spec=(_('Restore database'), 'lt.png',
                                      None, None),
                                      attr='action_restore_database')
        ac.triggered.connect(self.restore_database, type=Qt.QueuedConnection)
        self.maintenance_menu.addAction(ac)

        self.choose_menu.addMenu(self.maintenance_menu)
        self.view_state_map = {}
        self.restore_view_state.connect(self._restore_view_state,
                type=Qt.QueuedConnection)
Beispiel #54
0
from mechanize import URLError

from calibre.ebooks.metadata.book.base import Metadata
from calibre import browser
from calibre.ebooks.BeautifulSoup import BeautifulSoup
from calibre.ebooks.chardet import xml_to_unicode
from polyglot.builtins import codepoint_to_chr, unicode_type, range
from polyglot.urllib import parse_qs, quote_plus

URL = \
"http://ww2.kdl.org/libcat/WhatsNext.asp?AuthorLastName={0}&AuthorFirstName=&SeriesName=&BookTitle={1}&CategoryID=0&cmdSearch=Search&Search=1&grouping="

_ignore_starts = '\'"' + ''.join(
    codepoint_to_chr(x)
    for x in list(range(0x2018, 0x201e)) + [0x2032, 0x2033])


def get_series(title, authors, timeout=60):
    mi = Metadata(title, authors)
    if title and title[0] in _ignore_starts:
        title = title[1:]
    title = re.sub(r'^(A|The|An)\s+', '', title).strip()
    if not title:
        return mi
    if isinstance(title, unicode_type):
        title = title.encode('utf-8')

    title = quote_plus(title)

    author = authors[0].strip()
Beispiel #55
0
 def index_for_group(self, name):
     for i in range(self.rowCount()):
         node = self.data[i]
         if node.data == name:
             return self.index(i, 0)
Beispiel #56
0
 def group_names(self):
     for i in range(self.rowCount()):
         node = self.data[i]
         yield node.data
Beispiel #57
0
def osx_version():
    if isosx:
        import platform
        src = platform.mac_ver()[0]
        m = re.match(r'(\d+)\.(\d+)\.(\d+)', src)
        if m:
            return int(m.group(1)), int(m.group(2)), int(m.group(3))


def confirm_config_name(name):
    return name + '_again'


_filename_sanitize_unicode = frozenset((u'\\', u'|', u'?', u'*', u'<',
    u'"', u':', u'>', u'+', u'/') + tuple(map(codepoint_to_chr, range(32))))


def sanitize_file_name(name, substitute=u'_'):
    '''
    Sanitize the filename `name`. All invalid characters are replaced by `substitute`.
    The set of invalid characters is the union of the invalid characters in Windows,
    macOS and Linux. Also removes leading and trailing whitespace.
    **WARNING:** This function also replaces path separators, so only pass file names
    and not full paths to it.
    '''
    if isbytestring(name):
        name = name.decode(filesystem_encoding, 'replace')
    if isbytestring(substitute):
        substitute = substitute.decode(filesystem_encoding, 'replace')
    chars = (substitute if c in _filename_sanitize_unicode else c for c in name)
Beispiel #58
0
from calibre.gui2.webengine import secure_webengine
from calibre.srv.render_book import check_for_maths
from calibre.utils.fonts.sfnt.container import Sfnt, UnsupportedFont
from calibre.utils.fonts.sfnt.errors import NoGlyphs
from calibre.utils.fonts.sfnt.merge import merge_truetype_fonts_for_pdf
from calibre.utils.fonts.sfnt.subset import pdf_subset
from calibre.utils.logging import default_log
from calibre.utils.monotonic import monotonic
from calibre.utils.podofo import (dedup_type3_fonts, get_podofo,
                                  remove_unused_fonts,
                                  set_metadata_implementation)
from calibre.utils.short_uuid import uuid4
from polyglot.builtins import filter, iteritems, map, range, unicode_type
from polyglot.urllib import urlparse

OK, KILL_SIGNAL = range(0, 2)
HANG_TIME = 60  # seconds

# }}}


# Utils {{{
def data_as_pdf_doc(data):
    podofo = get_podofo()
    ans = podofo.PDFDoc()
    ans.load(data)
    return ans


def preprint_js():
    ans = getattr(preprint_js, 'ans', None)
Beispiel #59
0
 def duplicates(self):
     for i in range(self.dup_list.topLevelItemCount()):
         x = self.dup_list.topLevelItem(i)
         if x.checkState(0) == Qt.CheckState.Checked:
             yield x.data(0, Qt.ItemDataRole.UserRole)
Beispiel #60
0
from calibre.srv.errors import JobQueueFull
from calibre.srv.pool import ThreadPool, PluginPool
from calibre.srv.opts import Options
from calibre.srv.jobs import JobsManager
from calibre.srv.utils import (
    socket_errors_socket_closed, socket_errors_nonblocking, HandleInterrupt,
    socket_errors_eintr, start_cork, stop_cork, DESIRED_SEND_BUFFER_SIZE,
    create_sock_pair)
from calibre.utils.socket_inheritance import set_socket_inherit
from calibre.utils.logging import ThreadSafeLog
from calibre.utils.monotonic import monotonic
from calibre.utils.mdns import get_external_ip
from polyglot.builtins import range

READ, WRITE, RDWR, WAIT = 'READ', 'WRITE', 'RDWR', 'WAIT'
WAKEUP, JOB_DONE = bytes(bytearray(range(2)))
IPPROTO_IPV6 = getattr(socket, "IPPROTO_IPV6", 41)


class ReadBuffer(object):  # {{{

    ' A ring buffer used to speed up the readline() implementation by minimizing recv() calls '

    __slots__ = ('ba', 'buf', 'read_pos', 'write_pos', 'full_state')

    def __init__(self, size=4096):
        self.ba = bytearray(size)
        self.buf = memoryview(self.ba)
        self.read_pos = 0
        self.write_pos = 0
        self.full_state = WRITE