Beispiel #1
0
 def exportFile(self, oid, f=None):
     if f is None:
         f = TemporaryFile()
     elif isinstance(f, str):
         f = open(f,'w+b')
     f.write('ZEXP')
     oids = [oid]
     done_oids = {}
     done=done_oids.has_key
     load=self._storage.load
     while oids:
         oid = oids.pop(0)
         if oid in done_oids:
             continue
         done_oids[oid] = True
         try:
             p, serial = load(oid, self._version)
         except:
             logger.debug("broken reference for oid %s", repr(oid),
                          exc_info=True)
         else:
             referencesf(p, oids)
             f.writelines([oid, p64(len(p)), p])
     f.write(export_end_marker)
     return f
Beispiel #2
0
 def exportFile(self, oid, f=None):
     if f is None:
         f = TemporaryFile()
     elif isinstance(f, str):
         f = open(f, 'w+b')
     f.write('ZEXP')
     oids = [oid]
     done_oids = {}
     done = done_oids.has_key
     load = self._storage.load
     while oids:
         oid = oids.pop(0)
         if oid in done_oids:
             continue
         done_oids[oid] = True
         try:
             p, serial = load(oid, self._version)
         except:
             logger.debug("broken reference for oid %s",
                          repr(oid),
                          exc_info=True)
         else:
             referencesf(p, oids)
             f.writelines([oid, p64(len(p)), p])
     f.write(export_end_marker)
     return f
Beispiel #3
0
def referrers(storage):
    result = {}
    for transaction in storage.iterator():
        for record in transaction:
            for oid in referencesf(record.data):
                result.setdefault(oid, []).append((record.oid, record.tid))
    return result
def referrers(storage):
    result = {}
    for transaction in storage.iterator():
        for record in transaction:
            for oid in referencesf(record.data):
                result.setdefault(oid, []).append((record.oid, record.tid))
    return result
Beispiel #5
0
def buildRefmap(fs):
    '''build a refmap from a filestorage. look in every record of every
       transaction. build a dict of oid -> list((referenced oids, mod.klass))
    '''
    refmap = {}
    fsi = fs.iterator()
    size = os.stat(fs.__name__).st_size
    start = time.time()
    lastPercent = 0.0
    interval = 0.005
    print "[1/3] Computing the Reference Map"
    for txn in fsi:
        percent = float(fsi._file.tell())/float(size) * 100

        if(percent - lastPercent > interval):
            spentTime = time.time() - start
            remainingTime = spentTime / float(fsi._file.tell()) * (float(size)) - spentTime
            sys.stdout.write("\r%f%% complete, time spent %s,  remaining time: %s" % (percent,GetInHMS(time.time() - start, True),  GetInHMS(remainingTime, False)))
            lastPercent = percent
        for rec in txn:
            if rec.data:
                mod, klass = get_pickle_metadata(rec.data)
            refs = referencesf(rec.data)
            refmap[rec.oid] = (refs, mod+"."+klass)
    print "\n",
    return refmap
Beispiel #6
0
def get_references(state):
    """Return the set of OIDs the given state refers to."""
    refs = set()
    if state:
        for oid in referencesf(str(state)):
            refs.add(u64(oid))
    return refs
Beispiel #7
0
def buildRefmap(fs):
    '''build a refmap from a filestorage. look in every record of every
       transaction. build a dict of oid -> list((referenced oids, mod.klass))
    '''
    refmap = {}
    fsi = fs.iterator()
    size = os.stat(fs.__name__).st_size
    start = time.time()
    lastPercent = 0.0
    interval = 0.005
    print "[1/3] Computing the Reference Map"
    for txn in fsi:
        percent = float(fsi._file.tell()) / float(size) * 100

        if (percent - lastPercent > interval):
            spentTime = time.time() - start
            remainingTime = spentTime / float(
                fsi._file.tell()) * (float(size)) - spentTime
            sys.stdout.write(
                "\r%f%% complete, time spent %s,  remaining time: %s" %
                (percent, GetInHMS(time.time() - start,
                                   True), GetInHMS(remainingTime, False)))
            lastPercent = percent
        for rec in txn:
            if rec.data:
                mod, klass = get_pickle_metadata(rec.data)
            refs = referencesf(rec.data)
            refmap[rec.oid] = (refs, mod + "." + klass)
    print "\n",
    return refmap
    def pack(self, t, referencesf):
        with self._lock:
            rindex = {}
            rootl = ['\0\0\0\0\0\0\0\0']

            # mark referenced objects
            while rootl:
                oid = rootl.pop()
                if oid in rindex:
                    continue
                p = self._opickle[oid]
                referencesf(p, rootl)
                rindex[oid] = None

            # sweep unreferenced objects
            for oid in self._index.keys():
                if oid not in rindex:
                    self._takeOutGarbage(oid)
    def pack(self, t, referencesf):
        with self._lock:
            rindex = {}
            rootl = ['\0\0\0\0\0\0\0\0']

            # mark referenced objects
            while rootl:
                oid = rootl.pop()
                if oid in rindex:
                    continue
                p = self._opickle[oid]
                referencesf(p, rootl)
                rindex[oid] = None

            # sweep unreferenced objects
            for oid in self._index.keys():
                if oid not in rindex:
                    self._takeOutGarbage(oid)
Beispiel #10
0
 def findrefs(self, pos):
     """Return a list of oids referenced as of packtime."""
     dh = self._read_data_header(pos)
     # Chase backpointers until we get to the record with the refs
     while dh.back:
         dh = self._read_data_header(dh.back)
     if dh.plen:
         return referencesf(self._file.read(dh.plen))
     else:
         return []
Beispiel #11
0
 def findrefs(self, pos):
     """Return a list of oids referenced as of packtime."""
     dh = self._read_data_header(pos)
     # Chase backpointers until we get to the record with the refs
     while dh.back:
         dh = self._read_data_header(dh.back)
     if dh.plen:
         return referencesf(self._file.read(dh.plen))
     else:
         return []
Beispiel #12
0
    def analyzeRecords(self, connection, records):
        cursor = connection.cursor()
        for record in records:
            current_oid = u64(record.oid)
            referred_oids = set(map(u64, referencesf(record.data)))

            for referred_oid in referred_oids or [-1]:
                cursor.execute("""
INSERT INTO links (source_oid, target_oid) VALUES
(?, ?)
            """, (current_oid, referred_oid))
        connection.commit()
Beispiel #13
0
    def pack(self, t, referencesf):
        self._lock_acquire()
        try:
            rindex={}
            referenced=rindex.has_key
            rootl=['\0\0\0\0\0\0\0\0']

            # mark referenced objects
            while rootl:
                oid=rootl.pop()
                if referenced(oid): continue
                p = self._opickle[oid]
                referencesf(p, rootl)
                rindex[oid] = None

            # sweep unreferenced objects
            for oid in self._index.keys():
                if not referenced(oid):
                    self._takeOutGarbage(oid)
        finally:
            self._lock_release()
Beispiel #14
0
    def pack(self, t, referencesf):
        self._lock_acquire()
        try:
            rindex={}
            referenced=rindex.has_key
            rootl=['\0\0\0\0\0\0\0\0']

            # mark referenced objects
            while rootl:
                oid=rootl.pop()
                if referenced(oid): continue
                p = self._opickle[oid]
                referencesf(p, rootl)
                rindex[oid] = None

            # sweep unreferenced objects
            for oid in self._index.keys():
                if not referenced(oid):
                    self._takeOutGarbage(oid)
        finally:
            self._lock_release()
Beispiel #15
0
    def exportFile(self, oid, f=None):
        if f is None:
            f = TemporaryFile(prefix="EXP")
        elif isinstance(f, six.string_types):
            f = open(f, 'w+b')
        f.write(b'ZEXP')
        oids = [oid]
        done_oids = {}
        done = done_oids.__contains__
        load = self._storage.load
        supports_blobs = IBlobStorage.providedBy(self._storage)
        while oids:
            oid = oids.pop(0)
            if oid in done_oids:
                continue
            done_oids[oid] = True
            try:
                p, serial = load(oid)
            except:
                logger.debug("broken reference for oid %s",
                             repr(oid),
                             exc_info=True)
            else:
                referencesf(p, oids)
                f.writelines([oid, p64(len(p)), p])

                if supports_blobs:
                    if not isinstance(self._reader.getGhost(p), Blob):
                        continue  # not a blob

                    blobfilename = self._storage.loadBlob(oid, serial)
                    f.write(blob_begin_marker)
                    f.write(p64(os.stat(blobfilename).st_size))
                    blobdata = open(blobfilename, "rb")
                    cp(blobdata, f)
                    blobdata.close()

        f.write(export_end_marker)
        return f
Beispiel #16
0
    def exportFile(self, oid, f=None):
        if f is None:
            f = TemporaryFile(prefix="EXP")
        elif isinstance(f, six.string_types):
            f = open(f,'w+b')
        f.write(b'ZEXP')
        oids = [oid]
        done_oids = {}
        done = done_oids.__contains__
        load = self._storage.load
        supports_blobs = IBlobStorage.providedBy(self._storage)
        while oids:
            oid = oids.pop(0)
            if oid in done_oids:
                continue
            done_oids[oid] = True
            try:
                p, serial = load(oid)
            except:
                logger.debug("broken reference for oid %s", repr(oid),
                             exc_info=True)
            else:
                referencesf(p, oids)
                f.writelines([oid, p64(len(p)), p])

                if supports_blobs:
                    if not isinstance(self._reader.getGhost(p), Blob):
                        continue # not a blob

                    blobfilename = self._storage.loadBlob(oid, serial)
                    f.write(blob_begin_marker)
                    f.write(p64(os.stat(blobfilename).st_size))
                    blobdata = open(blobfilename, "rb")
                    cp(blobdata, f)
                    blobdata.close()

        f.write(export_end_marker)
        return f
Beispiel #17
0
 def _total_size(oid, seen):
     v = cache.get(oid)
     if v is not None:
         return v
     data, serialno = load_current(fs, oid)
     size = len(data)
     for suboid in referencesf(data):
         if suboid in seen:
             continue
         seen[suboid] = 1
         size += _total_size(suboid, seen)
     cache[oid] = size
     if len(cache) == cache_size:
         cache.popitem()
     return size
Beispiel #18
0
def check_any_storage(storage):
    checked = set()
    to_check = deque([p64(0)])

    while to_check:
        check_oid = to_check.popleft()
        checked.add(check_oid)
        try:
            state, tid = storage.load(check_oid)
        except POSKeyError:
            print "Bad reference found", u64(check_oid)

        for reference in referencesf(state):
            if reference not in checked:
                to_check.append(reference)
Beispiel #19
0
 def _total_size(oid, seen):
     v = cache.get(oid)
     if v is not None:
         return v
     data, serialno = load_current(fs, oid)
     size = len(data)
     for suboid in referencesf(data):
         if suboid in seen:
             continue
         seen[suboid] = 1
         size += _total_size(suboid, seen)
     cache[oid] = size
     if len(cache) == cache_size:
         cache.popitem()
     return size
Beispiel #20
0
def main(path):
    fs = FileStorage(path, read_only=1)
    if PACK:
        fs.pack()

    db = ZODB.DB(fs)
    rt = db.open().root()
    paths = find_paths(rt, 3)

    def total_size(oid):
        cache = {}
        cache_size = 1000

        def _total_size(oid, seen):
            v = cache.get(oid)
            if v is not None:
                return v
            data, serialno = load_current(fs, oid)
            size = len(data)
            for suboid in referencesf(data):
                if suboid in seen:
                    continue
                seen[suboid] = 1
                size += _total_size(suboid, seen)
            cache[oid] = size
            if len(cache) == cache_size:
                cache.popitem()
            return size

        return _total_size(oid, {})

    keys = fs._index.keys()
    keys.sort()
    keys.reverse()

    if not VERBOSE:
        # If not running verbosely, don't print an entry for an object
        # unless it has an entry in paths.
        keys = filter(paths.has_key, keys)

    fmt = "%8s %5d %8d %s %s.%s"

    for oid in keys:
        data, serialno = load_current(fs, oid)
        mod, klass = get_pickle_metadata(data)
        refs = referencesf(data)
        path = paths.get(oid, '-')
        print(fmt % (U64(oid), len(data), total_size(oid), path, mod, klass))
Beispiel #21
0
def check_relstorage(connection):
    curr_objs = connection.cursor()
    cursor = connection.cursor()
    curr_objs.execute('select zoid, tid from current_object')
    for zoid, tid in curr_objs:
        cursor.execute('select state from object_state where zoid=%s and tid=%s',
                       (zoid, tid))
        state = cursor.fetchone()[0]
        if state is None:
            continue  # How would an object have a null state?
        for ref_zoid in referencesf(state):
            ref_zoid = u64(ref_zoid)
            cursor.execute('select tid from current_object where zoid=%s',
                           (ref_zoid,))
            if cursor.rowcount == 0:
                print "Bad reference found", ref_zoid
Beispiel #22
0
def main(path):
    fs = FileStorage(path, read_only=1)
    if PACK:
        fs.pack()

    db = ZODB.DB(fs)
    rt = db.open().root()
    paths = find_paths(rt, 3)

    def total_size(oid):
        cache = {}
        cache_size = 1000
        def _total_size(oid, seen):
            v = cache.get(oid)
            if v is not None:
                return v
            data, serialno = load_current(fs, oid)
            size = len(data)
            for suboid in referencesf(data):
                if suboid in seen:
                    continue
                seen[suboid] = 1
                size += _total_size(suboid, seen)
            cache[oid] = size
            if len(cache) == cache_size:
                cache.popitem()
            return size
        return _total_size(oid, {})

    keys = fs._index.keys()
    keys.sort()
    keys.reverse()

    if not VERBOSE:
        # If not running verbosely, don't print an entry for an object
        # unless it has an entry in paths.
        keys = filter(paths.has_key, keys)

    fmt = "%8s %5d %8d %s %s.%s"

    for oid in keys:
        data, serialno = load_current(fs, oid)
        mod, klass = get_pickle_metadata(data)
        refs = referencesf(data)
        path = paths.get(oid, '-')
        print(fmt % (U64(oid), len(data), total_size(oid), path, mod, klass))
Beispiel #23
0
def exportXML(jar, oid, file=None):
    # For performance reasons, exportXML does not use 'XMLrecord' anymore to map
    # oids. This requires to initialize MinimalMapping.marked_reference before
    # any string output, i.e. in ppml.Reference.__init__
    # This also fixed random failures when DemoStorage is used, because oids
    # can have values that have a shorter representation in 'repr' instead of
    # 'base64' (see ppml.convert) and ppml.String does not support this.
    load = jar._storage.load
    pickle_dict = {oid: None}
    max_cache = [1e7]  # do not cache more than 10MB of pickle data

    def getReorderedPickle(oid):
        p = pickle_dict[oid]
        if p is None:
            # Versions are ignored, but some 'load()' implementations require them
            # FIXME: remove "''" when TmpStore.load() on ZODB stops asking for it.
            p = load(oid, "")[0]
            p = reorderPickle(jar, p)[1]
            if len(p) < max_cache[0]:
                max_cache[0] -= len(p)
                pickle_dict[oid] = p
        return p

    # Sort records and initialize id_mapping
    id_mapping = ppml.MinimalMapping()
    reordered_oid_list = [oid]
    for oid in reordered_oid_list:
        _mapOid(id_mapping, oid)
        for oid in referencesf(getReorderedPickle(oid)):
            if oid not in pickle_dict:
                pickle_dict[oid] = None
                reordered_oid_list.append(oid)

    # Do real export
    if file is None:
        file = TemporaryFile()
    elif isinstance(file, basestring):
        file = open(file, "w+b")
    write = file.write
    write('<?xml version="1.0"?>\n<ZopeData>\n')
    for oid in reordered_oid_list:
        p = getReorderedPickle(oid)
        write(XMLrecord(oid, len(p), p, id_mapping))
    write("</ZopeData>\n")
    return file
Beispiel #24
0
def exportXML(jar, oid, file=None):
    # For performance reasons, exportXML does not use 'XMLrecord' anymore to map
    # oids. This requires to initialize MinimalMapping.marked_reference before
    # any string output, i.e. in ppml.Reference.__init__
    # This also fixed random failures when DemoStorage is used, because oids
    # can have values that have a shorter representation in 'repr' instead of
    # 'base64' (see ppml.convert) and ppml.String does not support this.
    load = jar._storage.load
    pickle_dict = {oid: None}
    max_cache = [1e7]  # do not cache more than 10MB of pickle data

    def getReorderedPickle(oid):
        p = pickle_dict[oid]
        if p is None:
            # Versions are ignored, but some 'load()' implementations require them
            # FIXME: remove "''" when TmpStore.load() on ZODB stops asking for it.
            p = load(oid, '')[0]
            p = reorderPickle(jar, p)[1]
            if len(p) < max_cache[0]:
                max_cache[0] -= len(p)
                pickle_dict[oid] = p
        return p

    # Sort records and initialize id_mapping
    id_mapping = ppml.MinimalMapping()
    reordered_oid_list = [oid]
    for oid in reordered_oid_list:
        _mapOid(id_mapping, oid)
        for oid in referencesf(getReorderedPickle(oid)):
            if oid not in pickle_dict:
                pickle_dict[oid] = None
                reordered_oid_list.append(oid)

    # Do real export
    if file is None:
        file = TemporaryFile()
    elif isinstance(file, basestring):
        file = open(file, 'w+b')
    write = file.write
    write('<?xml version="1.0"?>\n<ZopeData>\n')
    for oid in reordered_oid_list:
        p = getReorderedPickle(oid)
        write(XMLrecord(oid, len(p), p, id_mapping))
    write('</ZopeData>\n')
    return file
    def _finish(self, tid, u, d, e):
        zeros = {}
        referenceCount = self._referenceCount
        referenceCount_get = referenceCount.get
        oreferences = self._oreferences
        serial = self._tid
        index = self._index
        opickle = self._opickle
        self._ltid = tid

        # iterate over all the objects touched by/created within this
        # transaction
        for entry in self._tmp:
            oid, data = entry[:]
            referencesl = []
            referencesf(data, referencesl)
            references = {}
            for roid in referencesl:
                references[roid] = 1

            # Create a reference count for this object if one
            # doesn't already exist
            if referenceCount_get(oid) is None:
                referenceCount[oid] = 0

            # update references that are already associated with this
            # object
            roids = oreferences.get(oid, [])
            for roid in roids:
                if roid in references:
                    # still referenced, so no need to update
                    # remove it from the references dict so it doesn't
                    # get "added" in the next clause
                    del references[roid]
                else:
                    # Delete the stored ref, since we no longer
                    # have it
                    oreferences[oid].remove(roid)
                    # decrement refcnt:
                    rc = referenceCount_get(roid, 1)
                    rc = rc - 1
                    if rc < 0:
                        # This should never happen
                        raise ReferenceCountError(
                            "%s (Oid %r had refcount %s)" %
                            (ReferenceCountError.__doc__, roid, rc))
                    referenceCount[roid] = rc
                    if rc == 0:
                        zeros[roid] = 1

            # Create a reference list for this object if one
            # doesn't already exist
            if oreferences.get(oid) is None:
                oreferences[oid] = []

            # Now add any references that weren't already stored
            for roid in references.keys():
                oreferences[oid].append(roid)
                # Create/update refcnt
                rc = referenceCount_get(roid, 0)
                if rc == 0 and zeros.get(roid) is not None:
                    del zeros[roid]
                referenceCount[roid] = rc + 1

            index[oid] = serial
            opickle[oid] = data
            now = time.time()
            self._conflict_cache[(oid, serial)] = data, now

        if zeros:
            for oid in zeros.keys():
                if oid == '\0\0\0\0\0\0\0\0':
                    continue
                self._takeOutGarbage(oid)

        self._tmp = []
Beispiel #26
0
    def _finish(self, tid, u, d, e):
        zeros={}
        referenceCount=self._referenceCount
        referenceCount_get=referenceCount.get
        oreferences=self._oreferences
        serial=self._tid
        index=self._index
        opickle=self._opickle
        self._ltid = tid

        # iterate over all the objects touched by/created within this
        # transaction
        for entry in self._tmp:
            oid, data = entry[:]
            referencesl=[]
            referencesf(data, referencesl)
            references={}
            for roid in referencesl:
                references[roid]=1
            referenced=references.has_key

            # Create a reference count for this object if one
            # doesn't already exist
            if referenceCount_get(oid) is None:
                referenceCount[oid] = 0
                #zeros[oid]=1

            # update references that are already associated with this
            # object
            roids = oreferences.get(oid, [])
            for roid in roids:
                if referenced(roid):
                    # still referenced, so no need to update
                    # remove it from the references dict so it doesn't
                    # get "added" in the next clause
                    del references[roid]
                else:
                    # Delete the stored ref, since we no longer
                    # have it
                    oreferences[oid].remove(roid)
                    # decrement refcnt:
                    rc = referenceCount_get(roid, 1)
                    rc=rc-1
                    if rc < 0:
                        # This should never happen
                        raise ReferenceCountError, (
                            "%s (Oid %s had refcount %s)" %
                            (ReferenceCountError.__doc__,`roid`,rc)
                            )
                    referenceCount[roid] = rc
                    if rc==0:
                        zeros[roid]=1

            # Create a reference list for this object if one
            # doesn't already exist
            if oreferences.get(oid) is None:
                oreferences[oid] = []

            # Now add any references that weren't already stored
            for roid in references.keys():
                oreferences[oid].append(roid)
                # Create/update refcnt
                rc=referenceCount_get(roid, 0)
                if rc==0 and zeros.get(roid) is not None:
                    del zeros[roid]
                referenceCount[roid] = rc+1

            index[oid] =  serial
            opickle[oid] = data
            now = time.time()
            self._conflict_cache[(oid, serial)] = data, now

        if zeros:
            for oid in zeros.keys():
                if oid == '\0\0\0\0\0\0\0\0': continue
                self._takeOutGarbage(oid)

        self._tmp = []