Exemple #1
0
 def catchEncryptFilter(self, lpath, node, isDir):
     if self.efilters is not None:
         # get the encryption information we need
         einfo = self.efilters.check(lpath, node, isDir)
         # build and name some important stuff for readability
         etag = einfo[0]
         plugid = einfo[1]
         plugopts = einfo[2]
         plugtag = '%s.%s' % (plugid, plugopts)
         plug = getPM().getPluginInstance(plugid, plugtag, (None, plugopts,))
     else:
         # this should rarely be used.. the caller will likely be providing
         # the efilter object when calling this function, but it is here
         # in the event that they do not..
         etag = b''
         plug = getPM().getPluginInstance('crypt.null', '', (c, []))
         plugopts = (c, [])
     return (etag, plug, plugopts)
Exemple #2
0
 def catchEncryptFilter(self, lpath, node, isDir):
     if self.efilters is not None:
         # get the encryption information we need
         einfo = self.efilters.check(lpath, node, isDir)
         # build and name some important stuff for readability
         etag = einfo[0]
         plugid = einfo[1]
         plugopts = einfo[2]
         plugtag = '%s.%s' % (plugid, plugopts)
         plug = getPM().getPluginInstance(plugid, plugtag,
                                          (None, plugopts))
     else:
         # this should rarely be used.. the caller will likely be providing
         # the efilter object when calling this function, but it is here
         # in the event that they do not..
         etag = b''
         plug = getPM().getPluginInstance('crypt.null', '',
                                          (None, []))
         plugopts = (c, [])
     return (etag, plug, plugopts)
Exemple #3
0
def Pull(rhost, rport, sac, lpath, rpath = None, ssl = True, eventfunc = dummy):
    if rpath is None:
        rpath = b'/'

    # the default metadata size
    metasize = 128

    sac = bytes(sac, 'utf8')
    c = Client2(rhost, rport, sac, metasize = metasize)
    c.Connect(essl = ssl)

    rpbsz = len(rpath)
    
    jobFileTime = []
    jobFileSize = []
    jobDownload = []
    
    '''
        Transform each node into it's full remote path, then
        place it into the master node list for processing.
    '''
    def __eventDirEnum(pkg, result, vector):
        rpath = pkg[0]
        nodes = pkg[1]
        if result is None:
            return
        for node in result:
            # skip any other revision but current
            if node[3] is not None:
                continue
            # grab the meta data and determine the encoding/encryption tag if any
            meta = node[2]
            if meta is not None:
                tag = meta[1:1+32].strip(b'\x00')
            else:
                tag = None
            # check that its not a special revision or system folder
            name = node[0]
            name = rpath + b'/' + name
            nodes.append((name, node[1], tag))
    def __eventFileTime(pkg, result, vector):
        jobFileTime.append((pkg, result))
    
    def __eventFileSize(pkg, result, vector):
        jobFileSize.append((pkg, result))
    
    def __eventFileRead(pkg, result, vector):
        success = result[0]
        if success != 1:
            # for now.. just let someone know shit screwed up.. if they
            # really need it to work they can come edit the code and skip
            # this and continue onward..
            raise Exception('Error On Read From Remote For [%s] At Offset [%x]' % (_lpath, _off))
            return
        data = result[1]
        _lpath = pkg[0]
        _off = pkg[1]
        _fo = pkg[2]
        _opcount = pkg[3]

        logger.debug('write:%s:%x' % (_lpath, _off))

        _fo.write(_off, data)

        _opcount[0] -= 1

        '''
            The [0] is the current operation count and [1]
            is the init flag. If the init flag is True then
            the system is still creating pending requests 
            and therefore we should not terminate based on
            an operation count of zero.
        '''
        if _opcount[0] < 1 and _opcount[1] is False:
            # we are finished
            _fo.finish()
        
    echo = { 'echo': False }
        
    def __eventEcho(pkg, result, vector):
        pkg['echo'] = True
    
    # first enumerate the remote directory
    _nodes = c.DirList(rpath, Client.IOMode.Block)
    
    nodes = []
    __eventDirEnum((rpath, nodes), _nodes, 0)
    
    sentEcho = False
    while echo['echo'] is False:
        c.handleOrSend()

        quecount = len(nodes) + len(jobFileTime) + len(jobFileSize) + len(jobDownload)
        if quecount == 0 and c.waitCount() == 0 and sentEcho is False:
            sentEcho = True
            c.Echo(Client.IOMode.Callback, (__eventEcho, echo))
        
        # iterate through files
        for x in range(0, min(100, len(nodes))):
            # might be faste to pop from end of list
            # but this will ensure more expected order
            # of operations..
            node = nodes.pop(0)

            _rpath = node[0]
            #_lpath = '%s/%s' % (lpath, node[0][rpbsz:].decode('utf8'))
            _lpath = lpath + b'/' + node[0][rpbsz:]
            # if directory issue enumerate call
            if node[1] == 1:
                pkg = (_rpath, nodes)
                c.DirList(_rpath, Client.IOMode.Callback, (__eventDirEnum, pkg))
                continue
            # if file issue time check
            pkg = (_rpath, _lpath, node[2])
            c.FileTime(_rpath, Client.IOMode.Callback, (__eventFileTime, pkg))
        
        # iterate time responses
        for job in jobFileTime:
            _rpath = job[0][0]
            _lpath = job[0][1]
            _etag = job[0][2]       # etag from meta-data
            _rmtime = job[1]
            if os.path.exists(_lpath):
                stat = os.stat(_lpath)
                _lsize = stat.st_size
                _lmtime = stat.st_mtime
            else:
                _lsize = -1
                _lmtime = 0
                # create the local file (0 sized)
            # if newer then get file size so we can
            # truncate the local file
            if _rmtime >= _lmtime:
                logger.debug('date failed for %s with local:%s remote:%s' % (_lpath, _lmtime, _rmtime))
                pkg = (_rpath, _lpath, _lsize, _etag)
                c.FileSize(_rpath, Client.IOMode.Callback, (__eventFileSize, pkg))
        jobFileTime = []
        
        # iterate size responses
        for job in jobFileSize:
            _rpath = job[0][0]
            _lpath = job[0][1]
            _lsize = job[0][2]
            _etag = job[0][3]
            _rsize = job[1]
            # if size different truncate local file to match
            if _rsize[0] != 1:
                raise Exception('_rsize for %s failed' % _rpath)
            logger.debug('[size] %s lsize:%s rsize:%s' % (_lpath, _lsize, _rsize))
            _rsize = _rsize[1]
            if _lsize != _rsize:
                # truncate local file
                TruncateFile(_lpath, _rsize)
            # queue a download operation
            pkg = [_rpath, _lpath, _rsize, 0, _etag]
            jobDownload.append(pkg)
        jobFileSize = []
        
        # iterate download operations
        tr = []
        chunksize = 1024 * 1024 * 4     # default to 4MB chunk
        for job in jobDownload:
            _rpath = job[0]
            _lpath = job[1]
            _rsize = job[2]
            _curoff = job[3]
            _etag = job[4]

            '''
                We are going to download this file. We know the etag which is
                used by modification plugins to alter the file for encryption
                or compression for example. So we need to try to match the tag
                back with the plugin and the options for it. Then create a 
                write object so as it is written it is unmodified back to it's
                original form.
            '''
            if len(job) < 6:
                _etag = _etag.decode('utf8', 'ignore')
                _, _plugid, _plugopts = eventfunc('DecryptByTag', _etag)

                if _ is None and _etag is not None and len(_etag) > 0:
                    # well, we apparently have no entry for this file so we
                    # need to alert the user or the calling code that there
                    # is a problem that needs to be addressed
                    raise Exception('Tag specified as "%s" but no plugin found.' % _etag)

                if _ is None:
                    # just use the null plugin
                    _ = None
                    _plugid = 'crypt.null'
                    _plugopts = (None, [])

                plug = getPM().getPluginInstance(_plugid, _etag, (None, _plugopts,))
                _fo = plug.beginwrite(_lpath)
                job.append(_fo)
                _opcount = [0, True]
                job.append(_opcount)
            else:
                _fo = job[5]
                _opcount = job[6]

            # increment operation count
            _opcount[0] += 1

            # determine amount we can read and choose maximum
            _rem = _rsize - _curoff
            rsz = min(_rem, chunksize)
            pkg = (_lpath, _curoff, _fo, _opcount)
            # this should *not* read messages and execute callbacks, because
            # if it does then technically it could call the callback before
            # we have set the init flag to False meaning the file never gets
            # closed
            c.FileRead(_rpath, _curoff, rsz, Client.IOMode.Callback, (__eventFileRead, pkg))
            if _curoff + rsz >= _rsize:
                tr.append(job)
                logger.debug('finish:%s' % (_lpath))
                # set the init flag to False so the callback
                # code knows when the count reaches zero that
                # the file is done
                _opcount[1] = False
                continue
            job[3] = _curoff + rsz
        # remove completed jobs
        for t in tr:
            jobDownload.remove(t)
Exemple #4
0
def Push(rhost, rport, sac, lpath, rpath = None, ssl = True, eventfunc = dummy, chunksize = 1024 * 32):
    if rpath is None:
        rpath = b'/'

    metasize = 128

    sac = bytes(sac, 'utf8')
    c = Client2(rhost, rport, sac, metasize = metasize, eventfunc = eventfunc)
    c.Connect(essl = ssl)

    # produce remote and local paths
    lpbsz = len(lpath)

    #logger.debug('test')
    #print('testing')
    #c.FileTrun(b'/grape', 5, mode = Client.IOMode.Block)
    #c.FileWrite(b'/grape', 0, b'hello', mode = Client.IOMode.Block)
    #print('read', c.FileRead(b'/grape', 0, 5, mode = Client.IOMode.Block))
    #print('hash', c.FileHash(b'/grape', 0, 5, mode = Client.IOMode.Block))
    #_lhash = b'hello'
    #_lhash = c.HashKmc(_lhash, 128)
    #print('local-hash', _lhash)
    #sys.stdout.flush()
    #exit()

    # encryption plugin instances of options
    eplugs = {}
    
    jobDirEnum = []              # to be enumerated
    jobPendingFiles = []         # files pending processing
    jobGetRemoteSize = []        # 
    jobGetModifiedDate = []      #
    jobPatch = []                # patch jobs 
    jobPatchQueryDelayed = []    # delayed patch hash requests
    jobUpload = []               # upload jobs

    jobPatchOperations = []

    def getJobCount():
        return len(jobDirEnum) + len(jobPendingFiles) + len(jobGetRemoteSize) + \
               len(jobGetModifiedDate) + len(jobPatch) + len(jobPatchQueryDelayed) + \
               len(jobUpload)

    jobDirEnum.append(lpath)

    maxque = 2

    echo = { 'echo': False }
    sentEcho = False

    '''
        These turn the async polling model into a async callback model, at
        least to some extent. We still poll but we do not poll individual
        objects which reduces polling time (CPU burn)..
    '''
    def __eventFileSize(pkg, result, vector):
        jobGetRemoteSize.append((pkg, result))
    def __eventFileTime(pkg, result, vector):
        jobGetModifiedDate.append((pkg, result)) 
    def __eventEcho(pkg, result, vector):
        logger.debug('ECHO')
        pkg['echo'] = True
    def __eventFileWrite(pkg, result, vector):
        if result == 0:
            raise Exception('WRITE FAILED')
    def __eventHashReply(pkg, result, vector):
        # hash local file now and compare
        _success = result[0]
        _rhash = result[1]
        _rpath = pkg[0]
        _lpath = pkg[1]
        _rsize = pkg[2]
        _lsize = pkg[3]
        _offset = pkg[4]
        _size = pkg[5]
        _shrstate = pkg[6]
        _fo = pkg[7]

        if _success == 0:
            raise Exception('Hash Error')

        #gc.collect()

        _data = _fo.read(_offset, _size)
        #_lhash = _fo.read(_offset, _size)
        #fd = open(_lpath, 'rb')
        #fd.seek(_offset)
        #_data = fd.read(_size)
        #fd.seek(_offset)
        #_lhash = fd.read(_size)
        #fd.close()

        # this returns a new bytes instance representing
        # the hash
        _lhash = c.HashKmc(_data, 128)

        # if we have used more bytes than the file's actual size then we are
        # basically just wasting bandwidth and should immediantly force all
        # remaining operations to perform a write to turn this into an upload
        if _shrstate.bytesProtoUsed > (_shrstate.firstSize * 0.5) - _shrstate.bytesPatched:
            _shrstate.bytesPatched += len(_data)
            c.FileWrite(_rpath, _offset, _data, Client.IOMode.Discard)
            _shrstate.dec()
            if _shrstate.opCount < 1 and _shrstate.init is False:
                ct = time.time()
                c.FileSetTime(_rpath, int(ct), int(ct), Client.IOMode.Discard)
                eventfunc('PatchFinish', _rpath, _lpath)
                _fo.finish()
            return

        # if the hashes are the same do nothing
        if _lhash != _rhash:
            eventfunc('HashBad', _rpath, _lpath, _offset, _size, _rhash, _lhash)
            # let us decide if it is worth breaking down futher
            # or we should just cut our loses and force a patch

            # this section of the remote file is different so,
            # now we must decide to either patch this section 
            # or continue breaking it down further in hopes that
            # we find where it differs -- we could end up spending
            # more bandwidth on 
            #
            logger.debug('hash-bad:%s:%x:%x\n' % (_lpath, _offset, _size))
            # limit the minimum size of a hash
            if _size > 1024 * 128:
                # we have not decremented for this operation and we are
                # going to create two new (but we are going to die) so
                # only increment it by one
                logger.debug('hash; [%s] plus one' % id(_shrstate))
                _nsz = int(_size / 2)
                if _size % 2 == 0:
                    _aoff = int(_offset + _nsz)
                    _alen = _nsz
                    _boff = _offset
                    _blen = _nsz
                else:
                    _aoff = int(_offset + _nsz)
                    _alen = _nsz
                    _boff = _offset
                    # adjust by one since we had odd number of bytes
                    _blen = _nsz + 1
                # start the queries
                logger.debug('patching-split:%s:%x:%x (a:%x:%x) (b:%x:%x)' % (_lpath, _offset, _size, _aoff, _alen, _boff, _blen))
                # either execute it or delay it
                subjob = [_rpath, _lpath, _rsize, _lsize, _aoff, _alen, _shrstate, _fo]
                jobPatchQueryDelayed.append((_rpath, _aoff, _alen, subjob))
                # represent this sub-job/job as sleeping
                _shrstate.incsleeping()

                subjob = [_rpath, _lpath, _rsize, _lsize, _boff, _blen, _shrstate, _fo]
                jobPatchQueryDelayed.append((_rpath, _boff, _blen, subjob))
                # represent this sub-job/job as sleeping
                _shrstate.incsleeping()
                # this one turned into two (so only increment once)
                _shrstate.inc()

                # just an estimate of how much we are about to use
                _shrstate.bytesProtoUsed += 128 + (8 * 2 + 32) * 2
                return

            # just upload this section..
            eventfunc('Write', _rpath, _lpath, _offset, _size)
            _shrstate.bytesPatched += _size

            logger.debug('patching-section:%s:%x:%x' % (_lpath, _offset, _size))
            c.FileWrite(_rpath, _offset, _data, Client.IOMode.Callback, (__eventFileWrite, None))
            _shrstate.dec()
            # exit here
            return
        else:
            # track how many bytes we saved!
            _shrstate.bytesSaved += _size
            eventfunc('HashGood', _rpath, _lpath, _offset, _size)
            logger.debug('patching-match:%s:%x:%x' % (_lpath, _offset, _size))
        # decrement since we just died and spawned no sub-hash jobs
        logger.debug('hash; [%s] minus one' % id(_shrstate))
        _shrstate.dec()
        if _shrstate.opCount < 1 and _shrstate.init is False:
            # set the time to the current time to keep it from being check every
            # time by way of hashing
            ct = time.time()
            c.FileSetTime(_rpath, int(ct), int(ct), Client.IOMode.Discard)
            eventfunc('PatchFinish', _rpath, _lpath, _size)
            _fo.finish()
    
    # statistics
    databytesout = 0
    stat_uptodate = 0
    stat_uploaded = 0
    stat_patched = 0
    stat_checked = 0

    patchrunning = [0]
    
    dd = time.time()
    
    # the soft limit for application level buffer size
    buflimit = 1024 * 1024 * 4

    lt = time.time()

    # keep going until we get the echo 
    while echo['echo'] is False:
        # read any messages
        c.handleOrSend()

        # update our throughput
        eventfunc('Cycle', c.getThroughput())

        ########################################################
        ##################### LIMITERS #########################
        ########################################################
        #
        # This section sets and adjusts the limits which directly
        # effect memory consumption mainly, and also help to finish
        # things before starting new things. I placed everything
        # into a central location to make it easier to adjust stuff
        # because you can look at everything as a whole.
        #
        #    JOB-DIR-ENUM
        if len(jobPendingFiles) < 1000:
            # only produce more pending files when low
            jobDirEnumLimit = min(500, len(jobDirEnum))
        else:
            jobDirEnumLimit = 0
        #    JOB-PENDING-FILES
        # do not process any new files unless we have no patch operations
        # and no more than 19 upload operations.. otherwise we start spreading
        # things out and nothing really gets completely done
        if c.waitCount() < 500 and len(jobUpload) < 4:
            # only throw files into pipeline when wait count is low
            jobPendingFilesLimit = min(500, len(jobPendingFiles))
        else:
            jobPendingFilesLimit = 0
        #    JOB-PATCH-QUERY-DELAYED
        # always process delayed patch queries
        jobPatchQueryDelayedLimit = min(2, len(jobPatchQueryDelayed))
        #    JOB-PATCH
        # always process patch jobs
        jobPatchLimit = min(2, len(jobPatch))
        #########################################################
        
        # if our output buffer is empty, we have no jobs, and we are not 
        # waiting on any requests
        if c.getBytesToSend() < 1 and getJobCount() < 1 and c.waitCount() < 1 and sentEcho is False:
            sentEcho = True
            c.Echo(Client.IOMode.Callback, (__eventEcho, echo))
        
        # if we let this continue to grow it could eventually consume all
        # physical memory so we limit it and once it reaches that soft
        # limit it will empty our buffers completely
        boutbuf = c.getBytesToSend()
        if boutbuf > buflimit:
            logger.debug('emptying outbound buffer..')
            # i decided not to force the buffer to completely drain
            # in hopes that if we didnt it would be easier to keep
            # the server working - the more time the server is idle
            # is lost work time
            while c.getBytesToSend() > 1024 * 1024:
                #              STALL/NET-DEADLOCK PREVENTION
                # this will still cause callbacks to be fired and async results
                # to be stored at the same time it will send any data from our
                # application level buffers, which is desired - if we only sent
                # pending data and never read the server's outgoing buffer would
                # fill up because our incoming would fill up these creating a 
                # data lock because we could never send from our buffer because
                # the remote side could not read because it can not write
                c.handleOrSend()

                eventfunc('DumpCycle', c.getThroughput(), c.getBytesToSend())
                time.sleep(0.01)
        else:
            # just send what we can right now
            c.send()

        #
        # JOB-DIR-ENUM
        #
        # DESCRIPTION:
        #       this fills the pending files list; we limit it because if
        #       we do not it could overwhelm physical memory on the machine
        #       so we only produce more when there is room to do so
        x = -1
        for x in range(0, jobDirEnumLimit):
            dej = jobDirEnum[x] 
            try:
                nodes = os.listdir(dej)
            except:
                continue

            for node in nodes:
                _lpath = b'/'.join((dej, node))
                if os.path.isdir(_lpath):
                    # delay this..
                    res = eventfunc('Filter', _lpath, node, True)
                    if res or res is None:
                        jobDirEnum.append(_lpath)
                    continue
                res = eventfunc('Filter', _lpath, node, False)
                if res or res is None:
                    jobPendingFiles.append(_lpath)
        # drop what we completed
        jobDirEnum = jobDirEnum[x + 1:]

        #
        # JOB-PATCH-QUERY-DELAYED
        #
        # DESCRIPTION:
        #       this happens first because while we wish to work on multiple
        #       files at once we also wish to finish up in progress operations
        #       so in order to do that we first service pending hash requests
        #       which come from hash operations that get delayed because there
        #       was too many outstanding requests; our code path for hashing
        #       can cause physical memory to be overwhelmed if not limited and
        #       thus that is what this is doing - holding patch operations until
        #       our queue count is under the limit.
        x = -1
        keep = []
        #print('len(jobPatchQueryDelayed):%s patchrunning:%s' % (len(jobPatchQueryDelayed), patchrunning[0]))
        for x in range(0, len(jobPatchQueryDelayed)):
            job = jobPatchQueryDelayed[x]
            _rpath = job[0]
            _off = job[1]
            _size = job[2]
            subjob = job[3]
            # track how many are active [sent to server] (versus sleeping)
            if (subjob[6].opCount - subjob[6].opSleeping) > jobPatchQueryDelayedLimit:
                # we have too many active requests so wait until they complete
                # and are then represented as sleeping..
                keep.append(job)
                continue
            v = c.FileHash(_rpath, _off, _size, Client.IOMode.Callback, (__eventHashReply, subjob))
            # decrement sleeping to represent one that has been sent to the server
            subjob[6].decsleeping()                
        jobPatchQueryDelayed = keep
        # drop the ones we executed
        #jobPatchQueryDelayed = jobPatchQueryDelayed[x + 1:]

        #
        # JOB-PATCH
        #
        # DESCRIPTION:
        #       this will issue new hash requests which starting the patching
        #       process only if the queue is below a certain number of items
        #       which helps keep the memory consumption limited because these
        #       can get out of hand fast if you have a multi-GB file or even
        #       a TB can create one million requests which is a lot
        # LOCATION:
        #       this happens before any new files are placed into the queues
        #       so we can finish our patch jobs before moving on the to next
        #       file

        #if patchrunning[0] > 999999:
        #    _jobPatchLimit = 0
        #else:
        #    _jobPatchLimit = jobPatchLimit
        if patchrunning[0] > 10:
            _max = 0
        else:
            _max = min(jobPatchLimit, len(jobPatch))
        x = 0
        while x < _max:
            job = jobPatch[x]
            _rpath = job[0]
            _lpath = job[1]
            _rsize = job[2]
            _lsize = job[3]
            _curoff = job[4]
            # make sure to create the shared state container
            if len(job) < 6:
                shrstate = PatchJobSharedState()
                shrstate.firstOffset = _curoff
                shrstate.firstSize = _lsize
                shrstate.bytesProtoUsed = 0
                shrstate.bytesPatched = 0
                shrstate.bytesSaved = 0
                shrstate.opCount = 0
                shrstate.opSleeping = 0
                shrstate.startTime = time.time()
                shrstate.lpath = _lpath
                shrstate.rpath = _rpath
                shrstate.init = True
                shrstate.patchrunning = patchrunning
                job.append(shrstate)
                jobPatchOperations.append(shrstate)
                '''
                    This _fo object replaces the old code that used to open a file. The _fo
                    is short for file object. It provides the functionality of reading and
                    writing to a file except the implementation can do additional things.

                    See ./plugins/ and especially ./plugins/crypt for examples of the 
                    implementation of this.
                '''
                tag, plugid, plugopts = eventfunc('EncryptFilter', _lpath, _lpath[_lpath.rfind(b'/') + 1:], False)
                _fo = plug.beginread(_lpath)
                job.append(_fo)
                # make sure the correct metadata type/version (VERSION 1) byte is written
                c.FileWriteMeta(_rpath, 0, b'\xAA', Client.IOMode.Discard)
                # write in the encryption tag (for automated reversal using encryption filter file)
                btag = bytes(tag, 'utf8')
                if len(btag) > 32:
                    raise Exception('The encryption tag "%s" is longer than 32 bytes!' % tag)
                c.FileWriteMeta(_rpath, 1, btag.ljust(32, b'\x00'), Client.IOMode.Discard)
            else:
                # get existing shared state container
                shrstate = job[5]
                _fo = job[6]

            # hash 32MB chunks (server is threaded so it should be okay)
            csz = 1024 * 1024 * 4
            # get actual size due to what we have remaining of the file
            _tsz = min(csz, _lsize - _curoff)

            # increment number of operations ongoing
            shrstate.inc()

            # create job and request with callback
            subjob = [_rpath, _lpath, _rsize, _lsize, _curoff, _tsz, shrstate, _fo]
            c.FileHash(_rpath, _curoff, _tsz, Client.IOMode.Callback, (__eventHashReply, subjob))

            # determine and track if we are finished
            job[4] = job[4] + _tsz
            if job[4] >= _lsize:
                # do not increment x after removing it; this
                # feels kind of hacky but it does work right
                # now at least but i think its on the edge
                # of being bad code
                shrstate.init = False
                del jobPatch[x]
                _max = _max - 1
                continue

            # increment x since we did not remove anything
            x = x = 1

        #
        #  JOB-PENDING-FILES
        #
        # DESCRIPTION:
        #       This takes pending files created by enumeration of local directories,
        #       and starts processin by first issuing a file size request to determine
        #       if the file exist and if it is the correct size.
        x = -1
        for x in range(0, jobPendingFilesLimit):
            _lpath = jobPendingFiles[x]
            ##### DEBUG (NO FILES GREATER THAN 200MB) #####
            try:
                stat = os.stat(_lpath)
            except:
                continue
            _lsize = stat.st_size
            if _lsize > 1024 * 1024 * 200:
                continue
            ###############################################

            '''
                We have to get the modified size of the local file
                since we will be comparing this to the remote. For
                the NULL modification plugin it will be exactly the
                same, but for others it will be different. So we do
                that here. The plugin instance is cached by the plugin
                manager (PM).
            '''
            tag, plug, plugopts = eventfunc('EncryptFilter', _lpath, _lpath[_lpath.rfind(b'/') + 1:], False)
            if tag is None:
                # if none specified then default to null
                plug = getPM().getPluginInstance('crypt.null', '', (c, []))

            if plug is None:
                raise Exception('Apparently, we are missing a plugin referenced by "%s".' % plugid)

            _esize = plug.getencryptedsize(_lpath)
            _lsize = _esize

            _rpath = rpath + _lpath[lpbsz:]
            stat_checked = stat_checked + 1
            pkg = (_rpath, _lpath, _lsize, None, int(stat.st_mtime))
            eventfunc('Start', _rpath, _lpath)
            c.FileSize(_rpath, Client.IOMode.Callback, (__eventFileSize, pkg))
        # drop what we completed
        jobPendingFiles = jobPendingFiles[x + 1:]

        ########################################################
        ############## NO LIMIT SECTIONS BELOW #################
        ########################################################

        #
        # JOB-GET-REMOTE-SIZE
        #
        for rsj in jobGetRemoteSize:
            pkg = rsj[0]
            _result = rsj[1]
            _rpath = pkg[0]
            _lpath = pkg[1]
            _lsize = pkg[2]
            _vector = pkg[3]
            _lmtime = pkg[4]

            eventfunc('SizeReply', _rpath, _lpath)

            # result[0] = success code is non-zero and result[1] = size (0 on failure code)
            _rsize = _result[1]
            if _lsize == _rsize and _result[0] == 1:
                # need to check modified date
                pkg = (_rpath, _lpath, _rsize, _lsize, _vector, _lmtime)
                c.FileTime(_rpath, Client.IOMode.Callback, (__eventFileTime, pkg))
            else:
                # first make the remote size match the local size
                c.FileTrun(_rpath, _lsize, Client.IOMode.Discard)
                if max(_rsize, _lsize) < 1:
                    eventfunc('Finished', _rpath, _lpath)
                    continue
                # need to decide if we want to upload or patch
                if min(_rsize, _lsize) / max(_rsize, _lsize) < 0.5:
                    # make upload job
                    jobUpload.append([_rpath, _lpath, _rsize, _lsize, 0])
                else:
                    # make patch job
                    jobPatch.append([_rpath, _lpath, _rsize, _lsize, 0])
        jobGetRemoteSize = []

        #
        # JOB-GET-MODIFIED-DATE
        #
        for rtj in jobGetModifiedDate:
            pkg = rtj[0]
            _rmtime = rtj[1]
            _rpath = pkg[0]
            _lpath = pkg[1]
            _rsize = pkg[2]
            _lsize = pkg[3]
            _vector = pkg[4]
            _lmtime = pkg[5]

            eventfunc('DateReply', _rpath, _lpath)

            if _rmtime < _lmtime:
                # need to decide if we want to upload or patch
                if min(_rsize, _lsize) / (max(_rsize, _lsize) + 1) < 0.5:
                    # make upload job
                    jobUpload.append([_rpath, _lpath, _rsize, _lsize, 0])
                else:
                    # make patch job
                    jobPatch.append([_rpath, _lpath, _rsize, _lsize, 0])
            else:
                # just drop it since its either up to date or newer
                eventfunc('Finished', _rpath, _lpath)
                stat_uptodate = stat_uptodate + 1
                continue
        jobGetModifiedDate = [] 

        # 
        # JOB-UPLOAD
        #
        # Limit, ourselves to only 4 at a time.
        tr = []
        cjc = 0
        #for uj in jobUpload:
        for x in range(0, min(4, len(jobUpload))):
            uj = jobUpload[x]
            _rpath = uj[0]
            _lpath = uj[1]
            _rsize = uj[2]
            _lsize = uj[3]
            _curoff = uj[4]

            '''
                Here we have to determine what modification plugin to use
                and then get an instance of it, and use that instance to
                do read operations instead of directly on the file.
            '''
            if len(uj) < 6:
                tag, plug, plugopts = eventfunc('EncryptFilter', _lpath, _lpath[_lpath.rfind(b'/') + 1:], False)  
                if plug is None:
                    # if none specified then default to null
                    plug = getPM().getPluginInstance('crypt.null', '', (c, []))
                    tag = ''
                try:
                    _fo = plug.beginread(_lpath)
                except PermissionError:
                    eventfunc('OpenFailed', _rpath, _lpath)
                    tr.append(uj)
                    continue

                uj.append(_fo)
                # make sure the correct metadata type/version (VERSION 1) byte is written
                c.FileWriteMeta(_rpath, 0, b'\xAA', Client.IOMode.Discard)
                # write in the encryption tag (for automated reversal using encryption filter file)
                btag = bytes(tag, 'utf8')
                if len(btag) > 32:
                    raise Exception('The encryption tag "%s" is longer than 32 bytes!' % tag)
                c.FileWriteMeta(_rpath, 1, btag.ljust(32, b'\x00'), Client.IOMode.Discard)
            else:
                _fo = uj[5]
            '''
                At this point we have the modification plugin instance
                read object and we are treating it like a normal file.
            '''
            _chunksize = chunksize
            # see what we can send
            _rem = min(_lsize - _curoff, _chunksize)
            # open local file and read chunk
            #_fd = open(_lpath, 'rb')
            #_fd.seek(_curoff)
            #_data = _fd.read(_rem)
            #_fd.close()

            _data = _fo.read(_curoff, _rem)

            # if no more data then stop and terminate this job
            if not _data:
                tr.append(uj)
                continue

            eventfunc('Write', _rpath, _lpath, _curoff, _chunksize)

            c.FileWrite(_rpath, _curoff, _data, Client.IOMode.Discard)
            # advance our current offset
            uj[4] = _curoff + len(_data)

            # (UNUSED) -- old code not we check if _data is empty
            # if we reached the EOF then drop it
            if uj[4] >= _lsize:
                tr.append(uj)
            
            # keep accounting for the bytes sent for statistics
            databytesout = databytesout + len(_data)

            # dont overfill our buffers just exit
            if c.getBytesToSend() > buflimit:
                break

            # keep track of number of jobs processed (i think old code)
            cjc = cjc + 1

        # remove finished jobs
        ct = time.time()
        for uj in tr:
            eventfunc('Finish', uj[0], uj[1])
            # set the modified time on the file to the current
            # time to represent it is up to date
            _rpath = uj[0]
            c.FileSetTime(_rpath, int(ct), int(ct), Client.IOMode.Discard)
            jobUpload.remove(uj)
            stat_uploaded = stat_uploaded + 1
        continue
    c.close()
    # we are done!
    return
Exemple #5
0
def Pull(rhost, rport, sac, lpath, rpath=None, ssl=True, eventfunc=dummy):
    if rpath is None:
        rpath = b'/'

    # the default metadata size
    metasize = 128

    sac = bytes(sac, 'utf8')
    c = Client2(rhost, rport, sac, metasize=metasize)
    c.Connect(essl=ssl)

    rpbsz = len(rpath)

    jobFileTime = []
    jobFileSize = []
    jobDownload = []
    '''
        Transform each node into it's full remote path, then
        place it into the master node list for processing.
    '''
    def __eventDirEnum(pkg, result, vector):
        rpath = pkg[0]
        nodes = pkg[1]
        if result is None:
            return
        for node in result:
            # skip any other revision but current
            if node[3] is not None:
                continue
            # grab the meta data and determine the encoding/encryption tag if any
            meta = node[2]
            if meta is not None:
                tag = meta[1:1 + 32].strip(b'\x00')
            else:
                tag = None
            # check that its not a special revision or system folder
            name = node[0]
            name = rpath + b'/' + name
            nodes.append((name, node[1], tag))

    def __eventFileTime(pkg, result, vector):
        jobFileTime.append((pkg, result))

    def __eventFileSize(pkg, result, vector):
        jobFileSize.append((pkg, result))

    def __eventFileRead(pkg, result, vector):
        success = result[0]
        if success != 1:
            # for now.. just let someone know shit screwed up.. if they
            # really need it to work they can come edit the code and skip
            # this and continue onward..
            raise Exception(
                'Error On Read From Remote For [%s] At Offset [%x]' %
                (_lpath, _off))
            return
        data = result[1]
        _lpath = pkg[0]
        _off = pkg[1]
        _fo = pkg[2]
        _opcount = pkg[3]

        logger.debug('write:%s:%x' % (_lpath, _off))

        _fo.write(_off, data)

        _opcount[0] -= 1
        '''
            The [0] is the current operation count and [1]
            is the init flag. If the init flag is True then
            the system is still creating pending requests 
            and therefore we should not terminate based on
            an operation count of zero.
        '''
        if _opcount[0] < 1 and _opcount[1] is False:
            # we are finished
            _fo.finish()

    echo = {'echo': False}

    def __eventEcho(pkg, result, vector):
        pkg['echo'] = True

    # first enumerate the remote directory
    _nodes = c.DirList(rpath, Client.IOMode.Block)

    nodes = []
    __eventDirEnum((rpath, nodes), _nodes, 0)

    sentEcho = False
    while echo['echo'] is False:
        c.handleOrSend()

        quecount = len(nodes) + len(jobFileTime) + len(jobFileSize) + len(
            jobDownload)
        if quecount == 0 and c.waitCount() == 0 and sentEcho is False:
            sentEcho = True
            c.Echo(Client.IOMode.Callback, (__eventEcho, echo))

        # iterate through files
        for x in range(0, min(100, len(nodes))):
            # might be faste to pop from end of list
            # but this will ensure more expected order
            # of operations..
            node = nodes.pop(0)

            _rpath = node[0]
            #_lpath = '%s/%s' % (lpath, node[0][rpbsz:].decode('utf8'))
            _lpath = lpath + b'/' + node[0][rpbsz:]
            # if directory issue enumerate call
            if node[1] == 1:
                pkg = (_rpath, nodes)
                c.DirList(_rpath, Client.IOMode.Callback,
                          (__eventDirEnum, pkg))
                continue
            # if file issue time check
            pkg = (_rpath, _lpath, node[2])
            c.FileTime(_rpath, Client.IOMode.Callback, (__eventFileTime, pkg))

        # iterate time responses
        for job in jobFileTime:
            _rpath = job[0][0]
            _lpath = job[0][1]
            _etag = job[0][2]  # etag from meta-data
            _rmtime = job[1]
            if os.path.exists(_lpath):
                stat = os.stat(_lpath)
                _lsize = stat.st_size
                _lmtime = stat.st_mtime
            else:
                _lsize = -1
                _lmtime = 0
                # create the local file (0 sized)
            # if newer then get file size so we can
            # truncate the local file
            if _rmtime >= _lmtime:
                logger.debug('date failed for %s with local:%s remote:%s' %
                             (_lpath, _lmtime, _rmtime))
                pkg = (_rpath, _lpath, _lsize, _etag)
                c.FileSize(_rpath, Client.IOMode.Callback,
                           (__eventFileSize, pkg))
        jobFileTime = []

        # iterate size responses
        for job in jobFileSize:
            _rpath = job[0][0]
            _lpath = job[0][1]
            _lsize = job[0][2]
            _etag = job[0][3]
            _rsize = job[1]
            # if size different truncate local file to match
            if _rsize[0] != 1:
                raise Exception('_rsize for %s failed' % _rpath)
            logger.debug('[size] %s lsize:%s rsize:%s' %
                         (_lpath, _lsize, _rsize))
            _rsize = _rsize[1]
            if _lsize != _rsize:
                # truncate local file
                TruncateFile(_lpath, _rsize)
            # queue a download operation
            pkg = [_rpath, _lpath, _rsize, 0, _etag]
            jobDownload.append(pkg)
        jobFileSize = []

        # iterate download operations
        tr = []
        chunksize = 1024 * 1024 * 4  # default to 4MB chunk
        for job in jobDownload:
            _rpath = job[0]
            _lpath = job[1]
            _rsize = job[2]
            _curoff = job[3]
            _etag = job[4]
            '''
                We are going to download this file. We know the etag which is
                used by modification plugins to alter the file for encryption
                or compression for example. So we need to try to match the tag
                back with the plugin and the options for it. Then create a 
                write object so as it is written it is unmodified back to it's
                original form.
            '''
            if len(job) < 6:
                _etag = _etag.decode('utf8', 'ignore')
                _, _plugid, _plugopts = eventfunc('DecryptByTag', _etag)

                if _ is None and _etag is not None and len(_etag) > 0:
                    # well, we apparently have no entry for this file so we
                    # need to alert the user or the calling code that there
                    # is a problem that needs to be addressed
                    raise Exception(
                        'Tag specified as "%s" but no plugin found.' % _etag)

                if _ is None:
                    # just use the null plugin
                    _ = None
                    _plugid = 'crypt.null'
                    _plugopts = (None, [])

                plug = getPM().getPluginInstance(_plugid, _etag, (
                    None,
                    _plugopts,
                ))
                _fo = plug.beginwrite(_lpath)
                job.append(_fo)
                _opcount = [0, True]
                job.append(_opcount)
            else:
                _fo = job[5]
                _opcount = job[6]

            # increment operation count
            _opcount[0] += 1

            # determine amount we can read and choose maximum
            _rem = _rsize - _curoff
            rsz = min(_rem, chunksize)
            pkg = (_lpath, _curoff, _fo, _opcount)
            # this should *not* read messages and execute callbacks, because
            # if it does then technically it could call the callback before
            # we have set the init flag to False meaning the file never gets
            # closed
            c.FileRead(_rpath, _curoff, rsz, Client.IOMode.Callback,
                       (__eventFileRead, pkg))
            if _curoff + rsz >= _rsize:
                tr.append(job)
                logger.debug('finish:%s' % (_lpath))
                # set the init flag to False so the callback
                # code knows when the count reaches zero that
                # the file is done
                _opcount[1] = False
                continue
            job[3] = _curoff + rsz
        # remove completed jobs
        for t in tr:
            jobDownload.remove(t)
Exemple #6
0
def Push(rhost,
         rport,
         sac,
         lpath,
         rpath=None,
         ssl=True,
         eventfunc=dummy,
         chunksize=1024 * 32):
    if rpath is None:
        rpath = b'/'

    metasize = 128

    sac = bytes(sac, 'utf8')
    c = Client2(rhost, rport, sac, metasize=metasize, eventfunc=eventfunc)
    c.Connect(essl=ssl)

    # produce remote and local paths
    lpbsz = len(lpath)

    #logger.debug('test')
    #print('testing')
    #c.FileTrun(b'/grape', 5, mode = Client.IOMode.Block)
    #c.FileWrite(b'/grape', 0, b'hello', mode = Client.IOMode.Block)
    #print('read', c.FileRead(b'/grape', 0, 5, mode = Client.IOMode.Block))
    #print('hash', c.FileHash(b'/grape', 0, 5, mode = Client.IOMode.Block))
    #_lhash = b'hello'
    #_lhash = c.HashKmc(_lhash, 128)
    #print('local-hash', _lhash)
    #sys.stdout.flush()
    #exit()

    # encryption plugin instances of options
    eplugs = {}

    jobDirEnum = []  # to be enumerated
    jobPendingFiles = []  # files pending processing
    jobGetRemoteSize = []  #
    jobGetModifiedDate = []  #
    jobPatch = []  # patch jobs
    jobPatchQueryDelayed = []  # delayed patch hash requests
    jobUpload = []  # upload jobs

    jobPatchOperations = []

    def getJobCount():
        return len(jobDirEnum) + len(jobPendingFiles) + len(jobGetRemoteSize) + \
               len(jobGetModifiedDate) + len(jobPatch) + len(jobPatchQueryDelayed) + \
               len(jobUpload)

    jobDirEnum.append(lpath)

    maxque = 2

    echo = {'echo': False}
    sentEcho = False
    '''
        These turn the async polling model into a async callback model, at
        least to some extent. We still poll but we do not poll individual
        objects which reduces polling time (CPU burn)..
    '''

    def __eventFileSize(pkg, result, vector):
        jobGetRemoteSize.append((pkg, result))

    def __eventFileTime(pkg, result, vector):
        jobGetModifiedDate.append((pkg, result))

    def __eventEcho(pkg, result, vector):
        logger.debug('ECHO')
        pkg['echo'] = True

    def __eventFileWrite(pkg, result, vector):
        if result == 0:
            raise Exception('WRITE FAILED')

    def __eventHashReply(pkg, result, vector):
        # hash local file now and compare
        _success = result[0]
        _rhash = result[1]
        _rpath = pkg[0]
        _lpath = pkg[1]
        _rsize = pkg[2]
        _lsize = pkg[3]
        _offset = pkg[4]
        _size = pkg[5]
        _shrstate = pkg[6]
        _fo = pkg[7]

        if _success == 0:
            raise Exception('Hash Error')

        #gc.collect()

        _data = _fo.read(_offset, _size)
        #_lhash = _fo.read(_offset, _size)
        #fd = open(_lpath, 'rb')
        #fd.seek(_offset)
        #_data = fd.read(_size)
        #fd.seek(_offset)
        #_lhash = fd.read(_size)
        #fd.close()

        # this returns a new bytes instance representing
        # the hash
        _lhash = c.HashKmc(_data, 128)

        # if we have used more bytes than the file's actual size then we are
        # basically just wasting bandwidth and should immediantly force all
        # remaining operations to perform a write to turn this into an upload
        if _shrstate.bytesProtoUsed > (_shrstate.firstSize *
                                       0.5) - _shrstate.bytesPatched:
            _shrstate.bytesPatched += len(_data)
            c.FileWrite(_rpath, _offset, _data, Client.IOMode.Discard)
            _shrstate.dec()
            if _shrstate.opCount < 1 and _shrstate.init is False:
                ct = time.time()
                c.FileSetTime(_rpath, int(ct), int(ct), Client.IOMode.Discard)
                eventfunc('PatchFinish', _rpath, _lpath)
                _fo.finish()
            return

        # if the hashes are the same do nothing
        if _lhash != _rhash:
            eventfunc('HashBad', _rpath, _lpath, _offset, _size, _rhash,
                      _lhash)
            # let us decide if it is worth breaking down futher
            # or we should just cut our loses and force a patch

            # this section of the remote file is different so,
            # now we must decide to either patch this section
            # or continue breaking it down further in hopes that
            # we find where it differs -- we could end up spending
            # more bandwidth on
            #
            logger.debug('hash-bad:%s:%x:%x\n' % (_lpath, _offset, _size))
            # limit the minimum size of a hash
            if _size > 1024 * 128:
                # we have not decremented for this operation and we are
                # going to create two new (but we are going to die) so
                # only increment it by one
                logger.debug('hash; [%s] plus one' % id(_shrstate))
                _nsz = int(_size / 2)
                if _size % 2 == 0:
                    _aoff = int(_offset + _nsz)
                    _alen = _nsz
                    _boff = _offset
                    _blen = _nsz
                else:
                    _aoff = int(_offset + _nsz)
                    _alen = _nsz
                    _boff = _offset
                    # adjust by one since we had odd number of bytes
                    _blen = _nsz + 1
                # start the queries
                logger.debug(
                    'patching-split:%s:%x:%x (a:%x:%x) (b:%x:%x)' %
                    (_lpath, _offset, _size, _aoff, _alen, _boff, _blen))
                # either execute it or delay it
                subjob = [
                    _rpath, _lpath, _rsize, _lsize, _aoff, _alen, _shrstate,
                    _fo
                ]
                jobPatchQueryDelayed.append((_rpath, _aoff, _alen, subjob))
                # represent this sub-job/job as sleeping
                _shrstate.incsleeping()

                subjob = [
                    _rpath, _lpath, _rsize, _lsize, _boff, _blen, _shrstate,
                    _fo
                ]
                jobPatchQueryDelayed.append((_rpath, _boff, _blen, subjob))
                # represent this sub-job/job as sleeping
                _shrstate.incsleeping()
                # this one turned into two (so only increment once)
                _shrstate.inc()

                # just an estimate of how much we are about to use
                _shrstate.bytesProtoUsed += 128 + (8 * 2 + 32) * 2
                return

            # just upload this section..
            eventfunc('Write', _rpath, _lpath, _offset, _size)
            _shrstate.bytesPatched += _size

            logger.debug('patching-section:%s:%x:%x' %
                         (_lpath, _offset, _size))
            c.FileWrite(_rpath, _offset, _data, Client.IOMode.Callback,
                        (__eventFileWrite, None))
            _shrstate.dec()
            # exit here
            return
        else:
            # track how many bytes we saved!
            _shrstate.bytesSaved += _size
            eventfunc('HashGood', _rpath, _lpath, _offset, _size)
            logger.debug('patching-match:%s:%x:%x' % (_lpath, _offset, _size))
        # decrement since we just died and spawned no sub-hash jobs
        logger.debug('hash; [%s] minus one' % id(_shrstate))
        _shrstate.dec()
        if _shrstate.opCount < 1 and _shrstate.init is False:
            # set the time to the current time to keep it from being check every
            # time by way of hashing
            ct = time.time()
            c.FileSetTime(_rpath, int(ct), int(ct), Client.IOMode.Discard)
            eventfunc('PatchFinish', _rpath, _lpath, _size)
            _fo.finish()

    # statistics
    databytesout = 0
    stat_uptodate = 0
    stat_uploaded = 0
    stat_patched = 0
    stat_checked = 0

    patchrunning = [0]

    dd = time.time()

    # the soft limit for application level buffer size
    buflimit = 1024 * 1024 * 4

    lt = time.time()

    # keep going until we get the echo
    while echo['echo'] is False:
        # read any messages
        c.handleOrSend()

        # update our throughput
        eventfunc('Cycle', c.getThroughput())

        ########################################################
        ##################### LIMITERS #########################
        ########################################################
        #
        # This section sets and adjusts the limits which directly
        # effect memory consumption mainly, and also help to finish
        # things before starting new things. I placed everything
        # into a central location to make it easier to adjust stuff
        # because you can look at everything as a whole.
        #
        #    JOB-DIR-ENUM
        if len(jobPendingFiles) < 1000:
            # only produce more pending files when low
            jobDirEnumLimit = min(500, len(jobDirEnum))
        else:
            jobDirEnumLimit = 0
        #    JOB-PENDING-FILES
        # do not process any new files unless we have no patch operations
        # and no more than 19 upload operations.. otherwise we start spreading
        # things out and nothing really gets completely done
        if c.waitCount() < 500 and len(jobUpload) < 4:
            # only throw files into pipeline when wait count is low
            jobPendingFilesLimit = min(500, len(jobPendingFiles))
        else:
            jobPendingFilesLimit = 0
        #    JOB-PATCH-QUERY-DELAYED
        # always process delayed patch queries
        jobPatchQueryDelayedLimit = min(2, len(jobPatchQueryDelayed))
        #    JOB-PATCH
        # always process patch jobs
        jobPatchLimit = min(2, len(jobPatch))
        #########################################################

        # if our output buffer is empty, we have no jobs, and we are not
        # waiting on any requests
        if c.getBytesToSend() < 1 and getJobCount() < 1 and c.waitCount(
        ) < 1 and sentEcho is False:
            sentEcho = True
            c.Echo(Client.IOMode.Callback, (__eventEcho, echo))

        # if we let this continue to grow it could eventually consume all
        # physical memory so we limit it and once it reaches that soft
        # limit it will empty our buffers completely
        boutbuf = c.getBytesToSend()
        if boutbuf > buflimit:
            logger.debug('emptying outbound buffer..')
            # i decided not to force the buffer to completely drain
            # in hopes that if we didnt it would be easier to keep
            # the server working - the more time the server is idle
            # is lost work time
            while c.getBytesToSend() > 1024 * 1024:
                #              STALL/NET-DEADLOCK PREVENTION
                # this will still cause callbacks to be fired and async results
                # to be stored at the same time it will send any data from our
                # application level buffers, which is desired - if we only sent
                # pending data and never read the server's outgoing buffer would
                # fill up because our incoming would fill up these creating a
                # data lock because we could never send from our buffer because
                # the remote side could not read because it can not write
                c.handleOrSend()

                eventfunc('DumpCycle', c.getThroughput(), c.getBytesToSend())
                time.sleep(0.01)
        else:
            # just send what we can right now
            c.send()

        #
        # JOB-DIR-ENUM
        #
        # DESCRIPTION:
        #       this fills the pending files list; we limit it because if
        #       we do not it could overwhelm physical memory on the machine
        #       so we only produce more when there is room to do so
        x = -1
        for x in range(0, jobDirEnumLimit):
            dej = jobDirEnum[x]
            try:
                nodes = os.listdir(dej)
            except:
                continue

            for node in nodes:
                _lpath = b'/'.join((dej, node))
                if os.path.isdir(_lpath):
                    # delay this..
                    res = eventfunc('Filter', _lpath, node, True)
                    if res or res is None:
                        jobDirEnum.append(_lpath)
                    continue
                res = eventfunc('Filter', _lpath, node, False)
                if res or res is None:
                    jobPendingFiles.append(_lpath)
        # drop what we completed
        jobDirEnum = jobDirEnum[x + 1:]

        #
        # JOB-PATCH-QUERY-DELAYED
        #
        # DESCRIPTION:
        #       this happens first because while we wish to work on multiple
        #       files at once we also wish to finish up in progress operations
        #       so in order to do that we first service pending hash requests
        #       which come from hash operations that get delayed because there
        #       was too many outstanding requests; our code path for hashing
        #       can cause physical memory to be overwhelmed if not limited and
        #       thus that is what this is doing - holding patch operations until
        #       our queue count is under the limit.
        x = -1
        keep = []
        #print('len(jobPatchQueryDelayed):%s patchrunning:%s' % (len(jobPatchQueryDelayed), patchrunning[0]))
        for x in range(0, len(jobPatchQueryDelayed)):
            job = jobPatchQueryDelayed[x]
            _rpath = job[0]
            _off = job[1]
            _size = job[2]
            subjob = job[3]
            # track how many are active [sent to server] (versus sleeping)
            if (subjob[6].opCount -
                    subjob[6].opSleeping) > jobPatchQueryDelayedLimit:
                # we have too many active requests so wait until they complete
                # and are then represented as sleeping..
                keep.append(job)
                continue
            v = c.FileHash(_rpath, _off, _size, Client.IOMode.Callback,
                           (__eventHashReply, subjob))
            # decrement sleeping to represent one that has been sent to the server
            subjob[6].decsleeping()
        jobPatchQueryDelayed = keep
        # drop the ones we executed
        #jobPatchQueryDelayed = jobPatchQueryDelayed[x + 1:]

        #
        # JOB-PATCH
        #
        # DESCRIPTION:
        #       this will issue new hash requests which starting the patching
        #       process only if the queue is below a certain number of items
        #       which helps keep the memory consumption limited because these
        #       can get out of hand fast if you have a multi-GB file or even
        #       a TB can create one million requests which is a lot
        # LOCATION:
        #       this happens before any new files are placed into the queues
        #       so we can finish our patch jobs before moving on the to next
        #       file

        #if patchrunning[0] > 999999:
        #    _jobPatchLimit = 0
        #else:
        #    _jobPatchLimit = jobPatchLimit
        if patchrunning[0] > 10:
            _max = 0
        else:
            _max = min(jobPatchLimit, len(jobPatch))
        x = 0
        while x < _max:
            job = jobPatch[x]
            _rpath = job[0]
            _lpath = job[1]
            _rsize = job[2]
            _lsize = job[3]
            _curoff = job[4]
            # make sure to create the shared state container
            if len(job) < 6:
                shrstate = PatchJobSharedState()
                shrstate.firstOffset = _curoff
                shrstate.firstSize = _lsize
                shrstate.bytesProtoUsed = 0
                shrstate.bytesPatched = 0
                shrstate.bytesSaved = 0
                shrstate.opCount = 0
                shrstate.opSleeping = 0
                shrstate.startTime = time.time()
                shrstate.lpath = _lpath
                shrstate.rpath = _rpath
                shrstate.init = True
                shrstate.patchrunning = patchrunning
                job.append(shrstate)
                jobPatchOperations.append(shrstate)
                '''
                    This _fo object replaces the old code that used to open a file. The _fo
                    is short for file object. It provides the functionality of reading and
                    writing to a file except the implementation can do additional things.

                    See ./plugins/ and especially ./plugins/crypt for examples of the 
                    implementation of this.
                '''
                tag, plugid, plugopts = eventfunc(
                    'EncryptFilter', _lpath, _lpath[_lpath.rfind(b'/') + 1:],
                    False)
                _fo = plug.beginread(_lpath)
                job.append(_fo)
                # make sure the correct metadata type/version (VERSION 1) byte is written
                c.FileWriteMeta(_rpath, 0, b'\xAA', Client.IOMode.Discard)
                # write in the encryption tag (for automated reversal using encryption filter file)
                btag = bytes(tag, 'utf8')
                if len(btag) > 32:
                    raise Exception(
                        'The encryption tag "%s" is longer than 32 bytes!' %
                        tag)
                c.FileWriteMeta(_rpath, 1, btag.ljust(32, b'\x00'),
                                Client.IOMode.Discard)
            else:
                # get existing shared state container
                shrstate = job[5]
                _fo = job[6]

            # hash 32MB chunks (server is threaded so it should be okay)
            csz = 1024 * 1024 * 4
            # get actual size due to what we have remaining of the file
            _tsz = min(csz, _lsize - _curoff)

            # increment number of operations ongoing
            shrstate.inc()

            # create job and request with callback
            subjob = [
                _rpath, _lpath, _rsize, _lsize, _curoff, _tsz, shrstate, _fo
            ]
            c.FileHash(_rpath, _curoff, _tsz, Client.IOMode.Callback,
                       (__eventHashReply, subjob))

            # determine and track if we are finished
            job[4] = job[4] + _tsz
            if job[4] >= _lsize:
                # do not increment x after removing it; this
                # feels kind of hacky but it does work right
                # now at least but i think its on the edge
                # of being bad code
                shrstate.init = False
                del jobPatch[x]
                _max = _max - 1
                continue

            # increment x since we did not remove anything
            x = x = 1

        #
        #  JOB-PENDING-FILES
        #
        # DESCRIPTION:
        #       This takes pending files created by enumeration of local directories,
        #       and starts processin by first issuing a file size request to determine
        #       if the file exist and if it is the correct size.
        x = -1
        for x in range(0, jobPendingFilesLimit):
            _lpath = jobPendingFiles[x]
            ##### DEBUG (NO FILES GREATER THAN 200MB) #####
            try:
                stat = os.stat(_lpath)
            except:
                continue
            _lsize = stat.st_size
            if _lsize > 1024 * 1024 * 200:
                continue
            ###############################################
            '''
                We have to get the modified size of the local file
                since we will be comparing this to the remote. For
                the NULL modification plugin it will be exactly the
                same, but for others it will be different. So we do
                that here. The plugin instance is cached by the plugin
                manager (PM).
            '''
            tag, plug, plugopts = eventfunc('EncryptFilter', _lpath,
                                            _lpath[_lpath.rfind(b'/') + 1:],
                                            False)
            if tag is None:
                # if none specified then default to null
                plug = getPM().getPluginInstance('crypt.null', '', (c, []))

            if plug is None:
                raise Exception(
                    'Apparently, we are missing a plugin referenced by "%s".' %
                    plugid)

            _esize = plug.getencryptedsize(_lpath)
            _lsize = _esize

            _rpath = rpath + _lpath[lpbsz:]
            stat_checked = stat_checked + 1
            pkg = (_rpath, _lpath, _lsize, None, int(stat.st_mtime))
            eventfunc('Start', _rpath, _lpath)
            c.FileSize(_rpath, Client.IOMode.Callback, (__eventFileSize, pkg))
        # drop what we completed
        jobPendingFiles = jobPendingFiles[x + 1:]

        ########################################################
        ############## NO LIMIT SECTIONS BELOW #################
        ########################################################

        #
        # JOB-GET-REMOTE-SIZE
        #
        for rsj in jobGetRemoteSize:
            pkg = rsj[0]
            _result = rsj[1]
            _rpath = pkg[0]
            _lpath = pkg[1]
            _lsize = pkg[2]
            _vector = pkg[3]
            _lmtime = pkg[4]

            eventfunc('SizeReply', _rpath, _lpath)

            # result[0] = success code is non-zero and result[1] = size (0 on failure code)
            _rsize = _result[1]
            if _lsize == _rsize and _result[0] == 1:
                # need to check modified date
                pkg = (_rpath, _lpath, _rsize, _lsize, _vector, _lmtime)
                c.FileTime(_rpath, Client.IOMode.Callback,
                           (__eventFileTime, pkg))
            else:
                # first make the remote size match the local size
                c.FileTrun(_rpath, _lsize, Client.IOMode.Discard)
                if max(_rsize, _lsize) < 1:
                    eventfunc('Finished', _rpath, _lpath)
                    continue
                # need to decide if we want to upload or patch
                if min(_rsize, _lsize) / max(_rsize, _lsize) < 0.5:
                    # make upload job
                    jobUpload.append([_rpath, _lpath, _rsize, _lsize, 0])
                else:
                    # make patch job
                    jobPatch.append([_rpath, _lpath, _rsize, _lsize, 0])
        jobGetRemoteSize = []

        #
        # JOB-GET-MODIFIED-DATE
        #
        for rtj in jobGetModifiedDate:
            pkg = rtj[0]
            _rmtime = rtj[1]
            _rpath = pkg[0]
            _lpath = pkg[1]
            _rsize = pkg[2]
            _lsize = pkg[3]
            _vector = pkg[4]
            _lmtime = pkg[5]

            eventfunc('DateReply', _rpath, _lpath)

            if _rmtime < _lmtime:
                # need to decide if we want to upload or patch
                if min(_rsize, _lsize) / (max(_rsize, _lsize) + 1) < 0.5:
                    # make upload job
                    jobUpload.append([_rpath, _lpath, _rsize, _lsize, 0])
                else:
                    # make patch job
                    jobPatch.append([_rpath, _lpath, _rsize, _lsize, 0])
            else:
                # just drop it since its either up to date or newer
                eventfunc('Finished', _rpath, _lpath)
                stat_uptodate = stat_uptodate + 1
                continue
        jobGetModifiedDate = []

        #
        # JOB-UPLOAD
        #
        # Limit, ourselves to only 4 at a time.
        tr = []
        cjc = 0
        #for uj in jobUpload:
        for x in range(0, min(4, len(jobUpload))):
            uj = jobUpload[x]
            _rpath = uj[0]
            _lpath = uj[1]
            _rsize = uj[2]
            _lsize = uj[3]
            _curoff = uj[4]
            '''
                Here we have to determine what modification plugin to use
                and then get an instance of it, and use that instance to
                do read operations instead of directly on the file.
            '''
            if len(uj) < 6:
                tag, plug, plugopts = eventfunc(
                    'EncryptFilter', _lpath, _lpath[_lpath.rfind(b'/') + 1:],
                    False)
                if plug is None:
                    # if none specified then default to null
                    plug = getPM().getPluginInstance('crypt.null', '', (c, []))
                    tag = ''
                try:
                    _fo = plug.beginread(_lpath)
                except PermissionError:
                    eventfunc('OpenFailed', _rpath, _lpath)
                    tr.append(uj)
                    continue

                uj.append(_fo)
                # make sure the correct metadata type/version (VERSION 1) byte is written
                c.FileWriteMeta(_rpath, 0, b'\xAA', Client.IOMode.Discard)
                # write in the encryption tag (for automated reversal using encryption filter file)
                btag = bytes(tag, 'utf8')
                if len(btag) > 32:
                    raise Exception(
                        'The encryption tag "%s" is longer than 32 bytes!' %
                        tag)
                c.FileWriteMeta(_rpath, 1, btag.ljust(32, b'\x00'),
                                Client.IOMode.Discard)
            else:
                _fo = uj[5]
            '''
                At this point we have the modification plugin instance
                read object and we are treating it like a normal file.
            '''
            _chunksize = chunksize
            # see what we can send
            _rem = min(_lsize - _curoff, _chunksize)
            # open local file and read chunk
            #_fd = open(_lpath, 'rb')
            #_fd.seek(_curoff)
            #_data = _fd.read(_rem)
            #_fd.close()

            _data = _fo.read(_curoff, _rem)

            # if no more data then stop and terminate this job
            if not _data:
                tr.append(uj)
                continue

            eventfunc('Write', _rpath, _lpath, _curoff, _chunksize)

            c.FileWrite(_rpath, _curoff, _data, Client.IOMode.Discard)
            # advance our current offset
            uj[4] = _curoff + len(_data)

            # (UNUSED) -- old code not we check if _data is empty
            # if we reached the EOF then drop it
            if uj[4] >= _lsize:
                tr.append(uj)

            # keep accounting for the bytes sent for statistics
            databytesout = databytesout + len(_data)

            # dont overfill our buffers just exit
            if c.getBytesToSend() > buflimit:
                break

            # keep track of number of jobs processed (i think old code)
            cjc = cjc + 1

        # remove finished jobs
        ct = time.time()
        for uj in tr:
            eventfunc('Finish', uj[0], uj[1])
            # set the modified time on the file to the current
            # time to represent it is up to date
            _rpath = uj[0]
            c.FileSetTime(_rpath, int(ct), int(ct), Client.IOMode.Discard)
            jobUpload.remove(uj)
            stat_uploaded = stat_uploaded + 1
        continue
    c.close()
    # we are done!
    return