Beispiel #1
0
Datei: exif.py Projekt: quru/qis
def _parse_user_comment(val):
    """
    Interprets a UserComment field value, supplied either as the raw value
    or as a string containing comma separated byte codes.
    """
    # Use the same data type conversion as the parse for field type 7
    if CHAR_LIST_REGEX.match(val):
        val = [parse_int(num) for num in val.split(',')]
    # First 8 bytes gives string encoding - ASCII, JIS, or Unicode
    encoding_char_1 = val[0:1]
    val = val[8:]
    # Decode the rest
    if (type(val) != list):
        # val was already a string
        return val
    else:
        # val is a list of byte codes
        encoding_char_1 = _safe_string(encoding_char_1)
        if encoding_char_1 == '' or encoding_char_1 == 'A':
            # ASCII
            return _safe_string(val)
        elif encoding_char_1 == 'J':
            # JIS
            return unicode(str(bytearray(val)), 'SJIS')
        else:
            # Unicode
            return unicode(str(bytearray(val)))
Beispiel #2
0
 def listen(self, tail=''):
     parts = (tail + ' x x').split()[0:2]
     nap_count = util.parse_int(parts[0], 30)
     nap_time = util.parse_float(parts[1], 1)
     for i in range(0, nap_count):
         log.i([chr(b) for b in self.transport.peek()])
         time.sleep(nap_time)
Beispiel #3
0
 def handle_message(self, event, query):
   number = parse_int(query)
   if not number:
     response = requests.get(_XKCD_API_CURRENT_URL)
     response.raise_for_status()
     number = response.json()['num']
   return _XKCD_URL_TEMPLATE % number
Beispiel #4
0
 def handle_message(self, event, query):
   times = parse_int(query, default=1)
   dice_gen = (random.randrange(1, 7) for _ in xrange(times))
   if times > 10:
     counter = Counter(dice_gen)
     return ', '.join('%s: %s' % (k, counter[k]) for k in xrange(1, 7))
   else:
     return ', '.join(map(str, dice_gen))
Beispiel #5
0
def get_port(request):
    """
    Returns the port number in use on a Flask/Werkzeug request object.
    """
    sep_idx = request.host.find(':')
    if sep_idx == -1:
        return 443 if request.is_secure else 80
    else:
        return parse_int(request.host[sep_idx + 1:])
Beispiel #6
0
Datei: exif.py Projekt: quru/qis
def _get_prop_val(prop_handlers, prop_val):
    """
    Returns the string value of prop_val, as determined by the handlers in
    prop_handlers (use _get_prop_handlers to obtain this object)
    """
    try:
        (_, field_type, print_fn, field_options) = prop_handlers

        # Check for empty values
        if not prop_val:
            return ''

        # Use custom handler if one is set
        if print_fn:
            return print_fn(prop_val)

        if field_type:
            if field_type in (5, 10):
                # Ratios
                sep_idx = prop_val.find('/')
                if sep_idx > -1:
                    prop_val = Ratio(
                        parse_long(prop_val[0:sep_idx]),
                        parse_long(prop_val[sep_idx + 1:])
                    )
            elif field_options:
                # All those with pre-defined options are numeric, types 3, 4, or 7
                vals = prop_val.split(',')
                prop_val = ', '.join([field_options.get(parse_long(v), v) for v in vals])
            elif field_type == 7:
                # Custom encoding. Assume we have either a straight string or a
                # string of comma separated byte codes. i.e. "ABC" or "65, 66, 67"
                if CHAR_LIST_REGEX.match(prop_val):
                    prop_val = _safe_string([parse_int(num) for num in prop_val.split(',')])
                else:
                    prop_val = _safe_string(prop_val)
            else:
                # Anything else we can just convert straight to a string
                pass

        # Return the string form of whatever we now have
        return str(prop_val).strip()

    except:
        # Fall back to returning the value as-is
        return str(prop_val).strip()
Beispiel #7
0
def diagnostics_submit():
	required = set(('device', 'operating-system', 'udid', 'version', 'git-revision',
					'build-date-epoch', 'time-since-launch', 'preprocessor-avg-runtime'))
	if not required.issubset(set(request.form.keys())):
		abort(404)

	report = DiagnosticReport()
	report.submit_date = datetime.datetime.utcnow()
	report.device = request.form['device'].rstrip()
	report.system = request.form['operating-system'].rstrip()
	report.udid = request.form['udid'].rstrip()
	report.version = request.form['version'].rstrip()
	report.gitrev = request.form['git-revision'].rstrip()
	report.build_date = parse_date_epoch(request.form['build-date-epoch'])
	report.time_since_launch = parse_float(request.form['time-since-launch'])
	report.preprocessor_avg_runtime = parse_int(request.form['preprocessor-avg-runtime'])
	report.put()

	return ''
def decompress(source, target):
    # some info
    logging.debug("Starting decompression of %s to %s", repr(source),
                  repr(target))

    with open(source, "rb") as fsource:
        # some info
        logging.debug("Parsing header")
        magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header(
            fsource)
        logging.debug("    Magic number: %s", repr(magic))
        logging.debug("    Method: %s", repr(method))
        logging.debug("    Major version number: %d", majorversion)
        logging.debug("    Minor version number: %d", minorversion)
        logging.debug("    Page size: %d", pagesize)
        logging.debug("    Uncompressed size: %d", uncompressed_size)
        nointra, delta, inner = parse_method_name(method)
        fsource.flush()
        tmphandle, tmpfile = None, None
        if inner == "gzip":
            fsource = gzip.GzipFile(fileobj=fsource,
                                    mode="rb",
                                    compresslevel=9)
        elif inner == "bzip2":
            fsource = bz2file.BZ2File(filename=fsource,
                                      mode="rb",
                                      compresslevel=9)
        elif inner == "7zip":
            util.create_dir(".tmp")
            tmphandle, tmpfile = tempfile.mkstemp(dir=".tmp")
            with open(tmpfile, "wb") as ftmp:
                p = subprocess.Popen(["7za", "x", "-txz", "-si", "-so"],
                                     stdin=fsource,
                                     stdout=ftmp,
                                     stderr=subprocess.PIPE)
                p.communicate()
            fsource = open(tmpfile, "rb")
        try:
            reference = util.parse_string(fsource)
            logging.debug("Reference dump: %s", reference)

            # parse deduplicated pages
            fills = {}
            reference_list = list(util.parse_pagenr_list(fsource))
            for i in xrange(len(reference_list)):
                for left, right in util.parse_interval_list(fsource):
                    for pagenr in xrange(left, right + 1):
                        fills[pagenr] = reference_list[i]

            # parse diffs
            if delta:
                diffs = {}
                pagenrs = list(util.parse_pagenr_list(fsource))
                for i in xrange(len(pagenrs)):
                    diffs[pagenrs[i]] = util.parse_diff(fsource, pagesize)

            # parse new pages
            newpages = {}
            newdistinct = set()
            if nointra:
                for left, right in list(util.parse_interval_list(fsource)):
                    for j in xrange(left, right + 1):
                        page = fsource.read(pagesize)
                        newdistinct.add(page)
                        newpages[j] = page
            else:
                newcnt = util.parse_int(fsource, 4)
                intervals = []
                for _ in xrange(newcnt):
                    intervals.append(list(util.parse_interval_list(fsource)))
                for i in xrange(newcnt):
                    page = fsource.read(pagesize)
                    for left, right in intervals[i]:
                        for j in xrange(left, right + 1):
                            newdistinct.add(page)
                            newpages[j] = page
        finally:
            if tmphandle is not None:
                os.close(tmphandle)
                os.remove(tmpfile)
            if inner is not None:
                fsource.close()

        # reconstruct file
        pagenr = 0
        final = uncompressed_size / pagesize
        same_distinct, same_total = set(), 0
        different_distinct, different_total = set(), 0
        seen = set()
        diff_seen = set()
        with open(reference, "rb") as freference:
            with open(target, "wb") as ftarget:
                while pagenr < final:
                    if pagenr in fills:
                        freference.seek(pagesize * fills[pagenr])
                        page = freference.read(pagesize)
                        seen.add(page)
                        different_distinct.add(page)
                        different_total += 1
                        ftarget.write(page)
                    elif delta and pagenr in diffs:
                        freference.seek(pagenr * pagesize)
                        page = freference.read(pagesize)
                        newpage = util.apply_diff(page, diffs[pagenr])
                        diff_seen.add(newpage)
                        ftarget.write(newpage)
                    elif pagenr in newpages:
                        seen.add(newpages[pagenr])
                        ftarget.write(newpages[pagenr])
                    else:
                        freference.seek(pagesize * pagenr)
                        page = freference.read(pagesize)
                        seen.add(page)
                        same_distinct.add(page)
                        same_total += 1
                        ftarget.write(page)
                    pagenr += 1

    # some info
    logging.debug("New pages: %d/%d (%d/%d)", len(newpages), final,
                  len(newdistinct), len(seen))
    logging.debug("Deduplicated pages at the same offset: %d/%d (%d/%d)",
                  same_total, final, len(same_distinct), len(seen))
    logging.debug("Deduplicated pages at different offsets: %d/%d (%d/%d)",
                  len(fills), final, len(different_distinct), len(seen))
    logging.debug("Deduplicated pages in total: %d/%d (%d/%d)",
                  same_total + len(fills), final,
                  len(same_distinct | different_distinct), len(seen))
    if delta:
        logging.debug("Diffed pages: %d/%d (%d/%d)", len(diffs), final,
                      len(diff_seen), len(seen))
    logging.debug("Done")

    return 0
Beispiel #9
0
Datei: views.py Projekt: quru/qis
def image():
    logger.debug(request.method + ' ' + request.url)
    try:
        logged_in = session_logged_in()
        allow_uncache = app.config['BENCHMARKING'] or app.config['DEBUG']
        args = request.args

        # Get URL parameters for the image
        src         = args.get('src', '')
        page        = args.get('page', None)
        iformat     = args.get('format', None)
        template    = args.get('tmp', None)
        width       = args.get('width', None)
        height      = args.get('height', None)
        halign      = args.get('halign', None)
        valign      = args.get('valign', None)
        autosizefit = args.get('autosizefit', None)
        rotation    = args.get('angle', None)
        flip        = args.get('flip', None)
        top         = args.get('top', None)
        left        = args.get('left', None)
        bottom      = args.get('bottom', None)
        right       = args.get('right', None)
        autocropfit = args.get('autocropfit', None)
        fill        = args.get('fill', None)
        quality     = args.get('quality', None)
        sharpen     = args.get('sharpen', None)
        ov_src      = args.get('overlay', None)
        ov_size     = args.get('ovsize', None)
        ov_opacity  = args.get('ovopacity', None)
        ov_pos      = args.get('ovpos', None)
        icc_profile = args.get('icc', None)
        icc_intent  = args.get('intent', None)
        icc_bpc     = args.get('bpc', None)
        colorspace  = args.get('colorspace', None)
        strip       = args.get('strip', None)
        dpi         = args.get('dpi', None)
        tile        = args.get('tile', None)
        # Get URL parameters for handling options
        attach      = args.get('attach', None)
        xref        = args.get('xref', None)
        stats       = args.get('stats', None)
        # Get protected admin/internal parameters
        cache       = args.get('cache', '1') if logged_in or allow_uncache else '1'
        recache     = args.get('recache', None) if allow_uncache else None

        # eRez compatibility mode
        src = erez_params_compat(src)

        # Tweak strings as necessary and convert non-string parameters
        # to the correct data types
        try:
            # Image options
            if page is not None:
                page = parse_int(page)
            if iformat is not None:
                iformat = iformat.lower()
            if template is not None:
                template = template.lower()
            if width is not None:
                width = parse_int(width)
            if height is not None:
                height = parse_int(height)
            if halign is not None:
                halign = halign.lower()
            if valign is not None:
                valign = valign.lower()
            if autosizefit is not None:
                autosizefit = parse_boolean(autosizefit)
            if rotation is not None:
                rotation = parse_float(rotation)
            if flip is not None:
                flip = flip.lower()
            if top is not None:
                top = parse_float(top)
            if left is not None:
                left = parse_float(left)
            if bottom is not None:
                bottom = parse_float(bottom)
            if right is not None:
                right = parse_float(right)
            if autocropfit is not None:
                autocropfit = parse_boolean(autocropfit)
            if fill is not None:
                fill = parse_colour(fill)
            if quality is not None:
                quality = parse_int(quality)
            if sharpen is not None:
                sharpen = parse_int(sharpen)
            if ov_size is not None:
                ov_size = parse_float(ov_size)
            if ov_pos is not None:
                ov_pos = ov_pos.lower()
            if ov_opacity is not None:
                ov_opacity = parse_float(ov_opacity)
            if icc_profile is not None:
                icc_profile = icc_profile.lower()
            if icc_intent is not None:
                icc_intent = icc_intent.lower()
            if icc_bpc is not None:
                icc_bpc = parse_boolean(icc_bpc)
            if colorspace is not None:
                colorspace = colorspace.lower()
            if strip is not None:
                strip = parse_boolean(strip)
            if dpi is not None:
                dpi = parse_int(dpi)
            if tile is not None:
                tile = parse_tile_spec(tile)
            # Handling options
            if attach is not None:
                attach = parse_boolean(attach)
            if xref is not None:
                validate_string(xref, 0, 1024)
            if stats is not None:
                stats = parse_boolean(stats)
            # Admin/internal options
            if cache is not None:
                cache = parse_boolean(cache)
            if recache is not None:
                recache = parse_boolean(recache)
        except (ValueError, TypeError) as e:
            raise httpexc.BadRequest(unicode(e))

        # Package and validate the parameters
        try:
            # #2694 Enforce public image limits - perform easy parameter checks
            if not logged_in:
                width, height, autosizefit = _public_image_limits_pre_image_checks(
                    width, height, autosizefit, tile, template
                )
            # Store and normalise all the parameters
            image_attrs = ImageAttrs(src, -1, page, iformat, template,
                                     width, height, halign, valign,
                                     rotation, flip,
                                     top, left, bottom, right, autocropfit,
                                     autosizefit, fill, quality, sharpen,
                                     ov_src, ov_size, ov_pos, ov_opacity,
                                     icc_profile, icc_intent, icc_bpc,
                                     colorspace, strip, dpi, tile)
            image_engine.finalise_image_attrs(image_attrs)
        except ValueError as e:
            raise httpexc.BadRequest(unicode(e))

        # Get/create the database ID (from cache, validating path on create)
        image_id = data_engine.get_or_create_image_id(
            image_attrs.filename(),
            return_deleted=False,
            on_create=on_image_db_create_anon_history
        )
        if (image_id == 0):
            raise DoesNotExistError()  # Deleted
        elif (image_id < 0):
            raise DBError('Failed to add image to database')
        image_attrs.set_database_id(image_id)

        # Require view permission or file admin
        permissions_engine.ensure_folder_permitted(
            image_attrs.folder_path(),
            FolderPermission.ACCESS_VIEW,
            get_session_user()
        )
        # Ditto for overlays
        if ov_src:
            permissions_engine.ensure_folder_permitted(
                filepath_parent(ov_src),
                FolderPermission.ACCESS_VIEW,
                get_session_user()
            )

        # v1.17 If this is a conditional request with an ETag, see if we can just return a 304
        if 'If-None-Match' in request.headers and not recache:
            etag_valid, modified_time = _etag_is_valid(
                image_attrs,
                request.headers['If-None-Match'],
                False
            )
            if etag_valid:
                # Success HTTP 304
                return make_304_response(image_attrs, False, modified_time)

        # Get the requested image data
        image_wrapper = image_engine.get_image(
            image_attrs,
            'refresh' if recache else cache
        )
        if (image_wrapper is None):
            raise DoesNotExistError()

        # #2694 Enforce public image limits - check the dimensions
        #       of images that passed the initial parameter checks
        if not logged_in:
            try:
                _public_image_limits_post_image_checks(
                    image_attrs.width(),
                    image_attrs.height(),
                    image_attrs.template(),
                    image_wrapper.data(),
                    image_wrapper.attrs().format()
                )
            except ValueError as e:
                raise httpexc.BadRequest(unicode(e))  # As for the pre-check

        # Success HTTP 200
        return make_image_response(image_wrapper, False, stats, attach, xref)
    except httpexc.HTTPException:
        # Pass through HTTP 4xx and 5xx
        raise
    except ServerTooBusyError:
        logger.warn(u'503 Too busy for ' + request.url)
        raise httpexc.ServiceUnavailable()
    except ImageError as e:
        logger.warn(u'415 Invalid image file \'' + src + '\' : ' + unicode(e))
        raise httpexc.UnsupportedMediaType(unicode(e))
    except SecurityError as e:
        if app.config['DEBUG']:
            raise
        log_security_error(e, request)
        raise httpexc.Forbidden()
    except DoesNotExistError as e:
        # First time around the ID will be set. Next time around it
        # won't but we should check whether the disk file now exists.
        if image_attrs.database_id() > 0 or path_exists(image_attrs.filename(), require_file=True):
            image_engine.reset_image(image_attrs)
        logger.warn(u'404 Not found: ' + unicode(e))
        raise httpexc.NotFound(unicode(e))
    except Exception as e:
        if app.config['DEBUG']:
            raise
        logger.error(u'500 Error for ' + request.url + '\n' + unicode(e))
        raise httpexc.InternalServerError(unicode(e))
Beispiel #10
0
async def fetch_playlist(playlistId: str, chunk_size: int = 1024):
    """
    Fetch all metadata of a YouTube playlist.

    Performs multiple asynchronous HTTP requests to obtain all metadata and
    playlist items. Does not provide rich YouTube metadata: fields that are
    included in browse_request queries for modern JS YT-Clients.

    Result:
    {
        'id': string,
        'title': string,
        'description': string,
        'thumbnail': string,
        'length': integer,
        'views': integer,
        'uploader': {
            'name': string,
            'url': string
        },
        'items': [
            {
                'id': string,
                'title': string,
                'uploader': {
                    'name': string,
                    'url': string
                },
                'lengthSeconds': integer,
            }
        ]
    }
    """

    #
    # Initialize the result
    #

    playlist = {'id': playlistId, 'items': []}

    #
    # Build the headers for the ClientSession.
    #

    headers = get_default_headers()
    # Only accept HTML.
    headers['Accept'] = 'text/html'

    #
    # Retrieve landing page.
    #

    # Open a auto-raising ClientSession with default cookie handling.
    async with ClientSession(headers=headers,
                             raise_for_status=True) as session:
        # Step 1: Get the initial landing page.
        async with session.get('https://www.youtube.com/playlist',
                               params={'list': playlistId}) as response:
            # Assert that this really worked the way we wanted.
            assert response.status == 200
            assert response.content_type == 'text/html'
            encoding = response.get_encoding()
            assert is_valid_encoding(encoding)

            # Retrieve the '//div[@id=""]' node.
            is_content = lambda x: (x.tag, x.get('id')) == ('div', '')
            parser = etree.HTMLPullParser(events=('start', 'end'))
            content, discard = None, True
            while content is None and not response.content.at_eof():
                # Feed the parser the next chunk of data.
                parser.feed(
                    (await response.content.read(chunk_size)).decode(encoding))
                for event, node in parser.read_events():
                    if event == 'start':
                        if is_content(node):
                            # Content node reached, stop discarding.
                            discard = False
                        continue

                    if is_content(node):
                        # Content node finished, exit.
                        content = node
                        break

                    if discard:
                        # Discard everything before this point.
                        node.clear()
                        for ancestor in node.xpath('ancestor-or-self::*'):
                            while ancestor.getprevious() is not None:
                                del ancestor.getparent()[0]

        #
        # Parse the playlist header.
        #

        pl_header = content[0]
        assert pl_header.get('id') == 'pl-header'

        # Get the thumbnail.
        pl_thumb = pl_header[0][0]
        assert pl_thumb.tag == 'img'
        playlist['thumbnail'] = pl_thumb.get('src')

        # Get the title.
        pl_title = pl_header[1][0]
        assert pl_title.tag == 'h1'
        playlist['title'] = pl_title.text.strip()

        # Get the uploader.
        pl_uploader = pl_header[1][1][0][0]
        assert pl_uploader.tag == 'a'
        playlist['uploader'] = {
            'name': pl_uploader.text,
            'url': 'https://www.youtube.com' + pl_uploader.get('href')
        }

        # Get the length.
        pl_length = pl_header[1][1][1]
        assert pl_length.tag == 'li'
        playlist['length'] = parse_int(pl_length.text, aggressive=True)

        # Get the view count.
        pl_views = pl_header[1][1][2]
        assert pl_views.tag == 'li'
        playlist['views'] = parse_int(pl_views.text, aggressive=True)

        # Get the description.
        pl_description = pl_header[1][2][0]
        assert pl_description.tag == 'span'
        playlist['description'] = pl_description.text.strip()

        #
        # Parse the playlist items.
        #

        def parse_item(node):
            item = {'id': node.get('data-video-id')}

            # Get the video thumbnail.
            vid_thumb = node[2][0][0][0][0][0][0]
            assert vid_thumb.tag == 'img'
            item['thumbnail'] = vid_thumb.get('data-thumb')

            # Get video title.
            vid_title = node[3][0]
            assert vid_title.tag == 'a'
            item['title'] = vid_title.text.strip()

            # Get video uploader.
            vid_uploader = node[3][1][0]
            assert vid_uploader.tag == 'a'
            item['uploader'] = {
                'name': vid_uploader.text,
                'url': 'https://www.youtube.com' + vid_uploader.get('href')
            }

            # Get video length.
            vid_length = node[6][0][0][0]
            assert vid_length.tag == 'span'
            item['lengthSeconds'] = parse_ts(vid_length.text)

            return item

        pl_items = content[1][0][0][0][0]
        assert pl_items.get('id') == 'pl-load-more-destination'

        playlist['items'] += map(parse_item, pl_items)

        #
        # Fetch and parse all continuations.
        #

        load_more = index_s(content, 1, 0, 0, 1)
        assert load_more.tag == 'button'
        load_more = load_more.get(
            'data-uix-load-more-href') if load_more is not None else None
        while load_more is not None:
            # Request the continuation contents.
            async with session.get('https://www.youtube.com' + load_more,
                                   headers={'Accept':
                                            'application/json'}) as response:
                # Assert that this really worked the way we wanted.
                assert response.status == 200
                assert response.content_type == 'application/json'
                encoding = response.get_encoding()
                assert is_valid_encoding(encoding)

                # Parse the result data (large!)
                # TODO: This ought to be streamed as well.
                data = await response.json()
                assert 'content_html' in data
                assert 'load_more_widget_html' in data

                # Parse all new items.
                parser = etree.HTMLPullParser(events=('end', ))
                parser.feed(data['content_html'])
                for _, node in parser.read_events():
                    if node.tag == 'tr':
                        playlist['items'] += [parse_item(node)]

                        # Discard everything before this point.
                        node.clear()
                        for ancestor in node.xpath('ancestor-or-self::*'):
                            while ancestor.getprevious() is not None:
                                del ancestor.getparent()[0]

                # Extract the next continuation link
                match = re.search(r'data-uix-load-more-href=\"(.+?)\"',
                                  data['load_more_widget_html'])
                if match:
                    # Next continuation link.
                    load_more = match.group(1)
                else:
                    # No more continuations.
                    load_more = None

    return playlist
def decompress(source, target):
    # some info
    logging.debug("Starting decompression of %s to %s", repr(source),
                  repr(target))

    with open(source, "rb") as fsource:
        # some info
        logging.debug("Parsing header")
        magic, method, majorversion, minorversion, pagesize, uncompressed_size = util.parse_header(
            fsource)
        logging.debug("    Magic number: %s", repr(magic))
        logging.debug("    Method: %s", repr(method))
        logging.debug("    Major version number: %d", majorversion)
        logging.debug("    Minor version number: %d", minorversion)
        logging.debug("    Page size: %d", pagesize)
        logging.debug("    Uncompressed size: %d", uncompressed_size)
        inner = method.split("intradedup")[1]
        if not method.startswith("intradedup") or inner not in ("", "gzip",
                                                                "bzip2",
                                                                "7zip"):
            logging.error("Invalid method %s", repr(method))
            return -1

        fsource.flush()
        tmphandle, tmpfile = None, None
        if inner == "gzip":
            fsource = gzip.GzipFile(fileobj=fsource,
                                    mode="rb",
                                    compresslevel=9)
        elif inner == "bzip2":
            fsource = bz2file.BZ2File(filename=fsource,
                                      mode="rb",
                                      compresslevel=9)
        elif inner == "7zip":
            util.create_dir(".tmp")
            tmphandle, tmpfile = tempfile.mkstemp(dir=".tmp")
            with open(tmpfile, "wb") as ftmp:
                p = subprocess.Popen(["7za", "x", "-txz", "-si", "-so"],
                                     stdin=fsource,
                                     stdout=ftmp,
                                     stderr=subprocess.PIPE)
                p.communicate()
            fsource = open(tmpfile, "rb")
        try:
            # parse dictionary
            distinct = util.parse_int(fsource, 4)
            fills = {}
            pagelist = []
            for _ in xrange(distinct):
                page = fsource.read(pagesize)
                pagelist.append(page)
            for i in xrange(distinct):
                for left, right in util.parse_interval_list(fsource):
                    for pagenr in xrange(left, right + 1):
                        fills[pagenr] = pagelist[i]

            # reconstruct file
            pagenr = 0
            seen = set()
            with open(target, "wb") as ftarget:
                while True:
                    if pagenr in fills:
                        ftarget.write(fills[pagenr])
                        seen.add(fills[pagenr])
                    else:
                        page = fsource.read(pagesize)
                        seen.add(page)
                        if not page:
                            pagenr += 1
                            break
                        ftarget.write(page)
                    pagenr += 1
                while pagenr in fills:
                    ftarget.write(fills[pagenr])
                    seen.add(fills[pagenr])
                    pagenr += 1
            logging.debug("Deduplicated pages: %d/%d (%d/%d)", len(fills),
                          uncompressed_size / pagesize, distinct, len(seen))
        finally:
            if tmphandle is not None:
                os.close(tmphandle)
                os.remove(tmpfile)
            if inner is not None:
                fsource.close()
    logging.debug("Done")

    return 0