Example #1
0
    def make_delta(self, history_chain, old_file, new_file, out_file_name):
        """ Make a new binary change blob and write it into out_file_name.

        """
        if len(history_chain) == 0:
            #print "DOING FULL INSERT"
            return self.make_full_insert(new_file, out_file_name)

        #print "MAKING DELTA"
        in_file = open(new_file, 'rb')
        raw_new = None
        try:
            raw_new = in_file.read()
        finally:
            in_file.close()

        parent = NULL_SHA
        in_old = open(old_file, 'rb')
        try:
            raw_old = in_old.read()
            values = compress(mdiff.textdiff(raw_old, raw_new))
            parent = history_chain[0][0]
            out_file = open(out_file_name, 'wb')
            try:
                if values[0]:
                    out_file.write(values[0])
                out_file.write(values[1])
            finally:
                out_file.close()
        finally:
            in_old.close()

        return parent
Example #2
0
def unicode_make_patch(old_text, new_text):
    """ Helper wrapper around make_patch() which takes unicode strings."""
    values = compress(mdiff.textdiff(old_text.encode('utf8'),
                                     new_text.encode('utf8')))
    if values[0]:
        return ''.join(values)

    return values[1]
Example #3
0
    def revdiff(self, rev1, rev2):
        """return or calculate a delta between two revisions"""
        if rev1 > self.repotiprev and rev2 > self.repotiprev:
            return self.revlog2.revdiff(
                self.revlog2.rev(self.node(rev1)),
                self.revlog2.rev(self.node(rev2)))
        elif rev1 <= self.repotiprev and rev2 <= self.repotiprev:
            return revlog.revlog.revdiff(self, rev1, rev2)

        return mdiff.textdiff(self.revision(self.node(rev1)),
                              self.revision(self.node(rev2)))
def fastdelta(mf, mfgetter, base, changes):
    """Given a base manifest text as an array.array and a list of changes
    relative to that text, compute a delta that can be used by revlog.
    """
    delta = []
    dstart = None
    dend = None
    dline = [""]
    start = 0
    # zero copy representation of base as a buffer
    addbuf = util.buffer(base)

    changes = list(changes)
    if len(changes) < 1000:
        # start with a readonly loop that finds the offset of
        # each line and creates the deltas
        for f, todelete in changes:
            # bs will either be the index of the item or the insert point
            start, end = manifest._msearch(addbuf, f, start)
            if not todelete:
                h, fl = mfgetter(f)
                l = "%s\0%s%s\n" % (f, revlog.hex(h), fl)
            else:
                if start == end:
                    # item we want to delete was not found, error out
                    raise AssertionError(
                            (("failed to remove %s from manifest") % f))
                l = ""
            if dstart is not None and dstart <= start and dend >= start:
                if dend < end:
                    dend = end
                if l:
                    dline.append(l)
            else:
                if dstart is not None:
                    delta.append([dstart, dend, "".join(dline)])
                dstart = start
                dend = end
                dline = [l]

        if dstart is not None:
            delta.append([dstart, dend, "".join(dline)])
        # apply the delta to the base, and get a delta for addrevision
        deltatext, arraytext = manifest._addlistdelta(base, delta)
    else:
        # For large changes, it's much cheaper to just build the text and
        # diff it.
        arraytext = bytearray(mf.text())
        deltatext = mdiff.textdiff(util.buffer(base), util.buffer(arraytext))

    return arraytext, deltatext
Example #5
0
def make_patch(old_text, new_text):
    """ Return a raw patch bytes which transforms old_text into new_text. """
    values = compress(mdiff.textdiff(old_text, new_text))
    if values[0]:
        return ''.join(values)
    return values[1]
Example #6
0
 def diff(self, delta_object):
     def flatten(s):
         return s if isinstance(s, str) else str(s)
     return textdiff(flatten(delta_object.raw_data) if delta_object else '',
                     flatten(self.raw_data))
Example #7
0
 def revdiff(self, node1, node2):
     return mdiff.textdiff(self.revision(node1, raw=True),
                           self.revision(node2, raw=True))
Example #8
0
 def diff(self, other):
     return mdiff.textdiff(other.data if other else '', self.data)
Example #9
0
    def repackdata(self, ledger, target):
        ui = self.repo.ui
        maxchainlen = ui.configint('packs', 'maxchainlen', 1000)

        byfile = {}
        for entry in ledger.entries.itervalues():
            if entry.datasource:
                byfile.setdefault(entry.filename, {})[entry.node] = entry

        count = 0
        for filename, entries in sorted(byfile.iteritems()):
            ui.progress(_("repacking data"), count, unit=self.unit,
                        total=len(byfile))

            ancestors = {}
            nodes = list(node for node in entries.iterkeys())
            nohistory = []
            for i, node in enumerate(nodes):
                if node in ancestors:
                    continue
                ui.progress(_("building history"), i, unit='nodes',
                            total=len(nodes))
                try:
                    ancestors.update(self.fullhistory.getancestors(filename,
                        node, known=ancestors))
                except KeyError:
                    # Since we're packing data entries, we may not have the
                    # corresponding history entries for them. It's not a big
                    # deal, but the entries won't be delta'd perfectly.
                    nohistory.append(node)
            ui.progress(_("building history"), None)

            # Order the nodes children first, so we can produce reverse deltas
            orderednodes = list(reversed(self._toposort(ancestors)))
            if len(nohistory) > 0:
                ui.debug('repackdata: %d nodes without history\n' %
                         len(nohistory))
            orderednodes.extend(sorted(nohistory))

            # Filter orderednodes to just the nodes we want to serialize (it
            # currently also has the edge nodes' ancestors).
            orderednodes = filter(lambda node: node in nodes, orderednodes)

            # Garbage collect old nodes:
            if self.garbagecollect:
                neworderednodes = []
                for node in orderednodes:
                    # If the node is old and is not in the keepset, we skip it,
                    # and mark as garbage collected
                    if ((filename, node) not in self.keepkeys and
                        self.isold(self.repo, filename, node)):
                        entries[node].gced = True
                        continue
                    neworderednodes.append(node)
                orderednodes = neworderednodes

            # Compute delta bases for nodes:
            deltabases = {}
            nobase = set()
            referenced = set()
            nodes = set(nodes)
            for i, node in enumerate(orderednodes):
                ui.progress(_("processing nodes"), i, unit='nodes',
                            total=len(orderednodes))
                # Find delta base
                # TODO: allow delta'ing against most recent descendant instead
                # of immediate child
                deltatuple = deltabases.get(node, None)
                if deltatuple is None:
                    deltabase, chainlen = nullid, 0
                    deltabases[node] = (nullid, 0)
                    nobase.add(node)
                else:
                    deltabase, chainlen = deltatuple
                    referenced.add(deltabase)

                # Use available ancestor information to inform our delta choices
                ancestorinfo = ancestors.get(node)
                if ancestorinfo:
                    p1, p2, linknode, copyfrom = ancestorinfo

                    # The presence of copyfrom means we're at a point where the
                    # file was copied from elsewhere. So don't attempt to do any
                    # deltas with the other file.
                    if copyfrom:
                        p1 = nullid

                    if chainlen < maxchainlen:
                        # Record this child as the delta base for its parents.
                        # This may be non optimal, since the parents may have
                        # many children, and this will only choose the last one.
                        # TODO: record all children and try all deltas to find
                        # best
                        if p1 != nullid:
                            deltabases[p1] = (node, chainlen + 1)
                        if p2 != nullid:
                            deltabases[p2] = (node, chainlen + 1)

            # experimental config: repack.chainorphansbysize
            if ui.configbool('repack', 'chainorphansbysize', True):
                orphans = nobase - referenced
                orderednodes = self._chainorphans(ui, filename, orderednodes,
                    orphans, deltabases)

            # Compute deltas and write to the pack
            for i, node in enumerate(orderednodes):
                deltabase, chainlen = deltabases[node]
                # Compute delta
                # TODO: Optimize the deltachain fetching. Since we're
                # iterating over the different version of the file, we may
                # be fetching the same deltachain over and over again.
                meta = None
                if deltabase != nullid:
                    deltaentry = self.data.getdelta(filename, node)
                    delta, deltabasename, origdeltabase, meta = deltaentry
                    size = meta.get(constants.METAKEYSIZE)
                    if (deltabasename != filename or origdeltabase != deltabase
                        or size is None):
                        deltabasetext = self.data.get(filename, deltabase)
                        original = self.data.get(filename, node)
                        size = len(original)
                        delta = mdiff.textdiff(deltabasetext, original)
                else:
                    delta = self.data.get(filename, node)
                    size = len(delta)
                    meta = self.data.getmeta(filename, node)

                # TODO: don't use the delta if it's larger than the fulltext
                if constants.METAKEYSIZE not in meta:
                    meta[constants.METAKEYSIZE] = size
                target.add(filename, node, deltabase, delta, meta)

                entries[node].datarepacked = True

            ui.progress(_("processing nodes"), None)
            count += 1

        ui.progress(_("repacking data"), None)
        target.close(ledger=ledger)
Example #10
0
 def revdiff(self, node1, node2):
     return mdiff.textdiff(self.revision(node1),
                           self.revision(node2))