Ejemplo n.º 1
0
    def MapOverRange(futurekey, startbyte, endbyte, weight, **kwargs):
        logdebug("Enter MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight))

        linitialresult = initialresult if not initialresult is None else 0
        loncombineresultsf = oncombineresultsf if oncombineresultsf else lambda a, b: a + b
    
        try:
            # open file at gcspath for read
            with gcs.open(gcspath) as gcsfile:
                page, ranges = hwalk(gcsfile, pagesize, 2, startbyte, endbyte) 

            if pagemapf:
                lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf)
                taskkwargs["futurename"] = "pagemap %s of %s,%s" % (len(page), startbyte, endbyte)
                future(pagemapf, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = len(page), **taskkwargs)(page)
            else:
                setlocalprogress(futurekey, len(page))

            if ranges:
                newweight = (weight - len(page)) / len(ranges) if not weight is None else None 
                for arange in ranges:
                    taskkwargs["futurename"] = "shard %s" % (arange)

                    lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult if pagemapf else len(page), loncombineresultsf)

                    future(MapOverRange, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = newweight, **taskkwargs)(arange[0], arange[1], weight = newweight)
                
            if ranges or pagemapf:
                raise FutureReadyForResult("still going")
            else:
                return len(page)
        finally:
            logdebug("Leave MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight))
Ejemplo n.º 2
0
def ndbshardedpagemap(pagemapf=None, ndbquery=None, initialshards = 10, pagesize = 100, **taskkwargs):
    @task(**taskkwargs)
    def MapOverRange(keyrange, **kwargs):
        logdebug("Enter MapOverRange: %s" % keyrange)
 
        _fixkeyend(keyrange, kind)
 
        filteredquery = keyrange.filter_ndb_query(ndbquery)
         
        logdebug (filteredquery)
         
        keys, _, more = filteredquery.fetch_page(pagesize, keys_only=True)
         
        if pagemapf:
            pagemapf(keys)
                     
        if more and keys:
            newkeyrange = KeyRange(keys[-1], keyrange.key_end, keyrange.direction, False, keyrange.include_end)
            krlist = newkeyrange.split_range()
            logdebug("krlist: %s" % krlist)
            for kr in krlist:
                MapOverRange(kr)
        logdebug("Leave MapOverRange: %s" % keyrange)
 
    kind = ndbquery.kind
 
    krlist = KeyRange.compute_split_points(kind, initialshards)
    logdebug("first krlist: %s" % krlist)
 
    for kr in krlist:
        MapOverRange(kr)
Ejemplo n.º 3
0
def OnProgressF(futurekey):
    futureobj = futurekey.get() if futurekey else None
    if futureobj.parentkey:
        taskkwargs = futureobj.get_taskkwargs()

        logdebug("Enter OnProgressF: %s" % futureobj)

        @task(**taskkwargs)
        def UpdateParent(parentkey):
            logdebug("***************************************************")
            logdebug("Enter UpdateParent: %s" % parentkey)
            logdebug("***************************************************")

            parent = parentkey.get()
            logdebug("1: %s" % parent)
            if parent:
                logdebug("2")
                #                 if not parent.has_result():
                progress = 0
                for childfuture in get_children(parentkey):
                    logdebug("3: %s" % childfuture)
                    progress += childfuture.get_progress()
                logdebug("4: %s" % progress)
                parent.set_progress(progress)

        UpdateParent(futureobj.parentkey)
Ejemplo n.º 4
0
 def runtask(*args, **kwargs):
     pickled = cloudpickle.dumps((f, args, kwargs, passthroughargs))
     logdebug("task pickle length: %s" % len(pickled))
     if get_dump():
         logdebug("f:")
         dumper(f)
         logdebug("args:")
         dumper(args)
         logdebug("kwargs:")
         dumper(kwargs)
         logdebug("passthroughargs:")
         dumper(passthroughargs)
     try:
         task = taskqueue.Task(payload=pickled, **taskkwargscopy)
         return task.add(queue, transactional=transactional)
     except taskqueue.TaskTooLargeError:
         pickledf = cloudpickle.dumps(f)
         pickleda = cloudpickle.dumps(args)
         pickledk = cloudpickle.dumps(kwargs)
         pickledp = cloudpickle.dumps(passthroughargs)
         logexception(
             "task too large, need to use datastore (%s, %s, %s, %s)" %
             (len(pickledf), len(pickleda), len(pickledk), len(pickledp)))
         if parent:
             key = _TaskToRun(data=pickled, parent=parent).put()
         else:
             key = _TaskToRun(data=pickled).put()
         rfspickled = cloudpickle.dumps((None, [key], {}, {
             "_run_from_datastore": True
         }))
         task = taskqueue.Task(payload=rfspickled, **taskkwargscopy)
         return task.add(queue, transactional=transactional)
Ejemplo n.º 5
0
 def run_task(*args, **kwargs):
     pickled = cloudpickle.dumps((f, args, kwargs, extra))
     logdebug("task pickle length: %s" % len(pickled))
     if get_dump():
         logdebug("f:")
         dumper(f)
         logdebug("args:")
         dumper(args)
         logdebug("kwargs:")
         dumper(kwargs)
         logdebug("extra:")
         dumper(extra)
     try:
         t = taskqueue.Task(payload=pickled, **task_kwargs)
         return t.add(queue, transactional=transactional)
     except taskqueue.TaskTooLargeError:
         if parent:
             key = _TaskToRun(data=pickled, parent=parent).put()
         else:
             key = _TaskToRun(data=pickled).put()
         ds_pickled = cloudpickle.dumps((None, [key], {}, {
             "_run_from_datastore": True
         }))
         t = taskqueue.Task(payload=ds_pickled, **task_kwargs)
         return t.add(queue, transactional=transactional)
Ejemplo n.º 6
0
 def InvokeMap(futurekey, key, **kwargs):
     logdebug("Enter InvokeMap: %s" % key)
     try:
         obj = key.get()
         if not obj:
             raise RetryTaskException("couldn't get object for key %s" % key)
  
         return mapf(futurekey, obj, **kwargs)
     finally:
         logdebug("Leave InvokeMap: %s" % key)
Ejemplo n.º 7
0
    def toplevel(futurekey, *args, **kwargs):
        logdebug("Enter futureparallel.toplevel: %s" % futurekey)

        def OnAllChildSuccess():
            logdebug("Enter OnAllChildSuccess: %s" % futurekey)
            parentfuture = futurekey.get() if futurekey else None
            if parentfuture and not parentfuture.has_result():
                if not parentfuture.initialised or not parentfuture.readyforresult:
                    raise Exception("Parent not initialised, retry")

                @ndb.transactional()
                def get_children_trans():
                    return get_children(parentfuture.key)

                children = get_children_trans()

                logdebug("children: %s" % [child.key for child in children])
                if children:
                    result = []
                    error = None
                    finished = True
                    for childfuture in children:
                        logdebug("childfuture: %s" % childfuture.key)
                        if childfuture.has_result():
                            try:
                                childresult = childfuture.get_result()
                                logdebug("childresult(%s): %s" %
                                         (childfuture.status, childresult))
                                result += [childfuture.get_result()]
                                logdebug("intermediate result:%s" % result)
                            except Exception, ex:
                                logdebug("haserror:%s" % repr(ex))
                                error = ex
                                break
                        else:
                            logdebug("noresult")
                            finished = False

                    if error:
                        logwarning(
                            "Internal error, child has error in OnAllChildSuccess: %s"
                            % error)
                        parentfuture.set_failure(error)
                    elif finished:
                        logdebug("result: %s" % result)
                        parentfuture.set_success(result)
                    else:
                        logdebug(
                            "child not finished in OnAllChildSuccess, skipping"
                        )
                else:
                    logwarning(
                        "Internal error, parent has no children in OnAllChildSuccess"
                    )
                    parentfuture.set_failure(Exception("no children found"))
Ejemplo n.º 8
0
    def dofuturendbshardedmap(futurekey):
        logdebug(taskkwargs)
 
        linitialresult = initialresult if not initialresult is None else 0
        loncombineresultsf = oncombineresultsf if oncombineresultsf else lambda a, b: a + b
    
        def MapOverRange(futurekey, keyrange, weight, **kwargs):
            logdebug("Enter MapOverRange: %s" % keyrange)
            try:
                _fixkeyend(keyrange, kind)
                
                filteredquery = keyrange.filter_ndb_query(ndbquery)
                
                logdebug (filteredquery)
                 
                keys, _, more = filteredquery.fetch_page(pagesize, keys_only=True)

                lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, 0 if pagemapf else len(keys), lambda a, b: a + b)
                         
                if pagemapf:
                    futurename = "pagemap %s of %s" % (len(keys), keyrange)
                    lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf)
                    future(pagemapf, parentkey=futurekey, futurename=futurename, onallchildsuccessf=lonallchildsuccessf, weight = len(keys), **taskkwargs)(keys)
                else:
                    setlocalprogress(futurekey, len(keys))

                if more and keys:
                    lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult if pagemapf else len(keys), loncombineresultsf)
                    newkeyrange = KeyRange(keys[-1], keyrange.key_end, keyrange.direction, False, keyrange.include_end)
                    krlist = newkeyrange.split_range()
                    logdebug("krlist: %s" % krlist)
                    newweight = (weight / len(krlist)) - len(keys) if weight else None
                    for kr in krlist:
                        futurename = "shard %s" % (kr)
                        future(MapOverRange, parentkey=futurekey, futurename=futurename, onallchildsuccessf = lonallchildsuccessf, weight = newweight, **taskkwargs)(kr, weight = newweight)
# 
                if pagemapf or (more and keys):
#                 if (more and keys):
                    raise FutureReadyForResult("still going")
                else:
                    return len(keys)#(len(keys), 0, keyrange)
#                 return len(keys)
            finally:
                logdebug("Leave MapOverRange: %s" % keyrange)
  
        for kr in krlist:
            lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf)
            
            futurename = "shard %s" % (kr)

            newweight = weight / len(krlist) if weight else None
            future(MapOverRange, parentkey=futurekey, futurename=futurename, onallchildsuccessf=lonallchildsuccessf, weight = newweight, **taskkwargs)(kr, weight = newweight)
 
        raise FutureReadyForResult("still going")
Ejemplo n.º 9
0
            def higherlevelcompose(lop, rop):
                try:
                    retval = None
                    if lop and rop:
                        blobnames = [lop.get("blobname"), rop.get("blobname")]
                        blobs = getblobsbyname(gcsbucket, *blobnames)
                        if len(blobs) == 2:
                            ltotalcomponent_count = sum(
                                [blob.component_count for blob in blobs])
                            logdebug("ltotalcomponent_count: %s" %
                                     ltotalcomponent_count)
                            if ltotalcomponent_count > 1020:
                                logdebug("doing copying")
                                newblobnames = [
                                    "%s-copy" % blobname
                                    for blobname in blobnames
                                ]
                                for ix, blob in enumerate(blobs):
                                    try:
                                        copyblob(gcsbucket, blob,
                                                 newblobnames[ix])
                                    except Exception:
                                        logexception("deleteblobs(copy)")
                                try:
                                    deleteblobs(gcsbucket, blobs)
                                except Exception:
                                    logexception("deleteblobs(copy)")

                                blobnames = newblobnames
                                blobs = getblobsbyname(gcsbucket, *blobnames)

                            if len(blobs) == 2:
                                llocalfilename = gcstargetfilename if istop else GenerateStableId(
                                    blobnames[0] + blobnames[1])
                                lfilename = "%s/%s-%s" % (gcstargetprefix,
                                                          "composed",
                                                          llocalfilename)
                                retval = composeblobs(gcsbucket, lfilename,
                                                      blobs)
                                retval["count"] = lop.get(
                                    "count", 0) + rop.get("count", 0)
                                try:
                                    deleteblobs(gcsbucket, blobs)
                                except Exception:
                                    logexception("deleteblobs")
                        else:
                            raise Exception("Can't load blobs")
                    else:
                        retval = lop if lop else rop
                    return retval
                except Exception, ex:
                    logexception("higherlevelcompose")
                    raise ex
Ejemplo n.º 10
0
 def InvokeMap(key, **kwargs):
     logdebug("Enter InvokeMap: %s" % key)
     try:
         obj = key.get()
         if not obj:
             if not skipmissing:
                 raise RetryTaskException("couldn't get object for key %s" % key)
             # else just skip
         else:
             mapf(obj, **kwargs)
     finally:
         logdebug("Leave InvokeMap: %s" % key)
Ejemplo n.º 11
0
    def toplevel(futurekey, *args, **kwargs):
        logdebug("Enter futuresequence.toplevel: %s" % futurekey)

        def childonsuccessforindex(index, results):
            logdebug("Enter childonsuccessforindex: %s, %s, %s" %
                     (futurekey, index, json.dumps(results, indent=2)))

            def childonsuccess(childfuturekey):
                logdebug("Enter childonsuccess: %s, %s, %s" %
                         (futurekey, index, childfuturekey))
                logdebug("results: %s" % json.dumps(results, indent=2))
                try:
                    childfuture = GetFutureAndCheckReady(childfuturekey)

                    try:
                        result = childfuture.get_result()
                    except Exception, ex:
                        toplevelfuture = futurekey.get()
                        if toplevelfuture:
                            toplevelfuture.set_failure(ex)
                        else:
                            raise Exception(
                                "Can't load toplevel future for failure")
                    else:
                        logdebug("result: %s" % json.dumps(result, indent=2))
                        newresults = results + [result]
                        islast = (index == (len(flist) - 1))

                        if islast:
                            logdebug("islast")
                            toplevelfuture = futurekey.get()
                            if toplevelfuture:
                                logdebug("setting top level success")
                                toplevelfuture.set_success_and_readyforesult(
                                    newresults)
                            else:
                                raise Exception(
                                    "Can't load toplevel future for success")
                        else:
                            logdebug("not last")
                            taskkwargs["futurename"] = "%s [%s]" % (
                                futurenameprefix if futurenameprefix else "-",
                                index + 1)
                            future(flist[index + 1],
                                   parentkey=futurekey,
                                   onsuccessf=childonsuccessforindex(
                                       index + 1, newresults),
                                   weight=weight /
                                   len(flist) if weight else None,
                                   timeoutsec=timeoutsec,
                                   maxretries=maxretries,
                                   **taskkwargs)(newresults)
Ejemplo n.º 12
0
def task(f=None, **kw):
    if not f:
        return functools.partial(task, **kw)

    task_kwargs = deepcopy(kw)

    queue = task_kwargs.pop("queue", "default")
    transactional = task_kwargs.pop("transactional", False)
    parent = task_kwargs.pop("parent", None)
    include_headers = task_kwargs.pop("includeheaders", False)
    log_name = task_kwargs.pop(
        "logname", "%s/%s" %
        (getattr(f, '__module__', 'none'), getattr(f, '__name__', 'none')))

    task_kwargs["headers"] = dict(_TASKQUEUE_HEADERS)

    url = get_enqueue_url(log_name)  # _DEFAULT_ENQUEUE_URL % logname

    task_kwargs["url"] = url.lower()

    logdebug(task_kwargs)

    extra = {"includeheaders": include_headers}

    @functools.wraps(f)
    def run_task(*args, **kwargs):
        pickled = cloudpickle.dumps((f, args, kwargs, extra))
        logdebug("task pickle length: %s" % len(pickled))
        if get_dump():
            logdebug("f:")
            dumper(f)
            logdebug("args:")
            dumper(args)
            logdebug("kwargs:")
            dumper(kwargs)
            logdebug("extra:")
            dumper(extra)
        try:
            t = taskqueue.Task(payload=pickled, **task_kwargs)
            return t.add(queue, transactional=transactional)
        except taskqueue.TaskTooLargeError:
            if parent:
                key = _TaskToRun(data=pickled, parent=parent).put()
            else:
                key = _TaskToRun(data=pickled).put()
            ds_pickled = cloudpickle.dumps((None, [key], {}, {
                "_run_from_datastore": True
            }))
            t = taskqueue.Task(payload=ds_pickled, **task_kwargs)
            return t.add(queue, transactional=transactional)

    return run_task
Ejemplo n.º 13
0
    def getvalue(*args, **kwargs):
        lcachekey = cachekey if cachekey else make_flash(f, *args, **kwargs)

        retval = memcache.get(lcachekey)  # @UndefinedVariable
        if retval is None:
            logdebug("MISS: %s" % lcachekey)
            retval = f(*args, **kwargs)
            memcache.add(key=lcachekey, value=retval,
                         time=expiresec)  # @UndefinedVariable
        else:
            logdebug("HIT: %s" % lcachekey)

        return retval
Ejemplo n.º 14
0
    def MapOverRange(startpos, endpos, **kwargs):
        logdebug("Enter MapOverRange: %s, %s" % (startpos, endpos))

        # open file at gcspath for read
        with gcs.open(gcspath) as gcsfile:
            page, ranges = hwalk(gcsfile, pagesize, initialshards, startpos, endpos) 

        if ranges:
            for arange in ranges:
                MapOverRange(arange[0], arange[1])

        if pagemapf:
            pagemapf(page)

        logdebug("Leave MapOverRange: %s, %s" % (startpos, endpos))
Ejemplo n.º 15
0
            def childonsuccess(childfuturekey):
                logdebug("Enter childonsuccess: %s, %s, %s" %
                         (futurekey, index, childfuturekey))
                logdebug("results: %s" % json.dumps(results, indent=2))
                try:
                    childfuture = GetFutureAndCheckReady(childfuturekey)

                    try:
                        result = childfuture.get_result()
                    except Exception, ex:
                        toplevelfuture = futurekey.get()
                        if toplevelfuture:
                            toplevelfuture.set_failure(ex)
                        else:
                            raise Exception(
                                "Can't load toplevel future for failure")
                    else:
Ejemplo n.º 16
0
 def _set_local_progress_for_success(self):
     progressObj = self._get_progressobject()
     logdebug("progressObj = %s" % progressObj)
     weight = self.get_weight(progressObj)
     weight = weight or 1
     logdebug("weight = %s" % weight)
     localprogress = self.get_localprogress(progressObj)
     logdebug("localprogress = %s" % localprogress)
     if localprogress < weight and not self.GetChildren():
         logdebug("No children, we can auto set localprogress from weight")
         self.set_localprogress(weight)
Ejemplo n.º 17
0
            def _futurewrapper(headers):
                if maxretries:
                    lretryCount = 0
                    try:
                        lretryCount = int(
                            headers.get("X-Appengine-Taskretrycount",
                                        0)) if headers else 0
                    except:
                        logexception(
                            "Failed trying to get retry count, using 0")

                    if lretryCount > maxretries:
                        raise PermanentTaskFailure(
                            "Too many retries of Future")

                logdebug("inner, futurekey=%s" % futurekey)
                futureobj2 = futurekey.get()
                if futureobj2:
                    futureobj2.set_weight(weight)  # if weight >= 1 else 1)
                else:
                    raise Exception("Future not ready yet")

                try:
                    logdebug("args, kwargs=%s, %s" % (args, kwargs))
                    result = f(futurekey, *args, **kwargs)

                except FutureReadyForResult:
                    futureobj3 = futurekey.get()
                    if futureobj3:
                        futureobj3.set_readyforesult()

                except FutureNotReadyForResult:
                    futureobj4 = futurekey.get()
                    if futureobj4:
                        futureobj4.set_initialised()

                except PermanentTaskFailure, ptf:
                    try:
                        futureobj5 = futurekey.get()
                        if futureobj5:
                            futureobj5.set_failure(ptf)
                    finally:
                        raise ptf
Ejemplo n.º 18
0
def _launch_task(pickled, name, headers):
    try:
        # Add some task debug information.
        #         dheaders = []
        #         for key, value in headers.items():
        #             k = key.lower()
        #             if k.startswith("x-appengine-") and k not in _SKIP_HEADERS:
        #                 dheaders.append("%s:%s" % (key, value))
        #         logdebug(", ".join(dheaders))
        logdebug(", ".join(
            ["%s:%s" % (key, value) for key, value in headers.items()]))

        if not isFromTaskQueue(headers):
            raise PermanentTaskFailure(
                'Detected an attempted XSRF attack: we are not executing from a task queue.'
            )

        logdebug('before run "%s"' % name)
        _run(pickled, headers)
        logdebug('after run "%s"' % name)
    except PermanentTaskFailure:
        logexception("Aborting task")
    except:
        logexception("failure")
        raise
Ejemplo n.º 19
0
    def intask(self, nameprefix, f, *args, **kwargs):
        taskkwargs = self.get_taskkwargs()
        name = ""
        if nameprefix:
            name = "%s-%s" % (nameprefix, self.key.id())
            taskkwargs["name"] = name
        elif taskkwargs.get("name"):
            del taskkwargs["name"]
        taskkwargs["transactional"] = False

        @task(**taskkwargs)
        def dof():
            f(*args, **kwargs)

        try:
            # run the wrapper task, and if it fails due to a name clash just skip it (it was already kicked off by an earlier
            # attempt to construct this future).
            #             logdebug("about to run task %s" % name)
            dof()
        except taskqueue.TombstonedTaskError:
            logdebug("skip adding task %s (already been run)" % name)
        except taskqueue.TaskAlreadyExistsError:
            logdebug("skip adding task %s (already running)" % name)
Ejemplo n.º 20
0
    def MapOverRange(keyrange, **kwargs):
        logdebug("Enter MapOverRange: %s" % keyrange)
 
        _fixkeyend(keyrange, kind)
 
        filteredquery = keyrange.filter_ndb_query(ndbquery)
         
        logdebug (filteredquery)
         
        keys, _, more = filteredquery.fetch_page(pagesize, keys_only=True)
         
        if pagemapf:
            pagemapf(keys)
                     
        if more and keys:
            newkeyrange = KeyRange(keys[-1], keyrange.key_end, keyrange.direction, False, keyrange.include_end)
            krlist = newkeyrange.split_range()
            logdebug("krlist: %s" % krlist)
            for kr in krlist:
                MapOverRange(kr)
        logdebug("Leave MapOverRange: %s" % keyrange)
Ejemplo n.º 21
0
    def set_localprogress(self, value):
        obj = self._get_progressobject()
        local = self.get_localprogress(obj)
        calculated = self.get_calculatedprogress(obj)
        if local != value:
            #             haschildren = self.GetChildren()
            #             logdebug("haschildren: %s" % haschildren)

            obj.localprogress = value
            logdebug("localprogress: %s" % value)
            #             if not haschildren:
            changed = value > calculated
            if changed:
                logdebug("setting calculated progress")
                obj.calculatedprogress = value

            obj.put()

            if changed:
                logdebug("kicking off calculate parent progress")
                self._calculate_parent_progress()

            self._callOnProgress()
Ejemplo n.º 22
0
    def set_localprogress(self, value):
        progressobj = self._get_progressobject()
        localprogress = self.get_localprogress(progressobj)
        calculatedprogress = self.get_calculatedprogress(progressobj)
        if localprogress != value:
            #             haschildren = self.GetChildren()
            #             logdebug("haschildren: %s" % haschildren)

            progressobj.localprogress = value
            logdebug("localprogress: %s" % value)
            #             if not haschildren:
            lneedupd = value > calculatedprogress
            if lneedupd:
                logdebug("setting calculated progress")
                progressobj.calculatedprogress = value

            progressobj.put()

            if lneedupd:
                logdebug("kicking off calculate parent progress")
                self._calculate_parent_progress()

            self._callOnProgress()
Ejemplo n.º 23
0
    def getvalue(*args, **kwargs):
        key = cachekey if cachekey else make_flash(f, args, kwargs)
        logdebug("Enter gcscacher.getvalue: %s" % key)

        bucket = bucketname if bucketname else os.environ.get(
                                                        'BUCKET_NAME',
                                                    app_identity.get_default_gcs_bucket_name())
        
        lpicklepath = "/%s/gcscache/%s.pickle" % (bucket, key)

        logdebug("picklepath: %s" % lpicklepath)

        lsaved = None
        try:
            #1: Get the meta info
            with gcs.open(lpicklepath) as picklefile:
                lsaved = pickle.load(picklefile)
        except gcs.NotFoundError:
            pass
        
        lexpireat = lsaved.get("expireat") if lsaved else None
        lcontent = None
        lcacheIsValid = False
        if lsaved and not (lexpireat and lexpireat < get_utcnow_unixtimestampusec()):
            lcontent = lsaved.get("content")
            lcacheIsValid = True

        if not lcacheIsValid:
            logdebug("GCS Cache miss")
            lcontent = f(*args, **kwargs)
            logdebug("write content back to gcs")
            ltosave = {
                "expireat": get_utcnow_unixtimestampusec() + (expiresec * 1000000) if expiresec else None,
                "content": lcontent
            }
            with gcs.open(lpicklepath, "w") as picklefilewrite:
                cloudpickle.dump(ltosave, picklefilewrite)
        else:
            logdebug("GCS Cache hit")

        logdebug("Leave gcscacher.getvalue: %s" % key)

        return lcontent
Ejemplo n.º 24
0
            
            if numfiles > 32:
                ranges = CalculateFileRanges(startindex, finishindex, 2)
                logdebug("ranges:%s" % ranges)
                for r in ranges:
                    futurename = "split %s" % (r, )
                    future(GCSCombineToTarget, futurename=futurename, onallchildsuccessf=onallchildsuccessf, parentkey=futurekey, weight = r[1]-r[0], **taskkwargs)(r[0], r[1], False)
                raise FutureReadyForResult()
            else:
                lblobs = list(listbucket(gcsbucket, gcssourceprefix))[startindex:finishindex]
                lfilename = "%s/%s" % (gcstargetprefix, gcstargetfilename if istop else "composed-%s-%s" % (startindex, finishindex))
#                 lfilename = "%s/%s-%s-%s" % (gcstargetprefix, "composed", startindex, finishindex)
                retval = composeblobs(gcsbucket, lfilename, lblobs)
                return retval
        finally:
            logdebug("Leave GCSCombineToTarget: %s, %s" % (startindex, finishindex))
    
    futurename = "gcscombinetotarget %s" % (numgcsfiles)

    return future(GCSCombineToTarget, futurename=futurename, onsuccessf = onsuccessf, onfailuref = onfailuref, onprogressf = onprogressf, parentkey=parentkey, weight = numgcsfiles, **taskkwargs)(0, numgcsfiles, True)

def hwalk(afile, pagesizeinlines, numranges, startbytes, endbytes):
    ## print(afile, pagesizeinlines, numranges, startbytes, endbytes)

    page = []
    ranges = []

    if startbytes <= 0:
        #1: we're at the start of the file, just start here
        afile.seek(0, 0)
    else:
Ejemplo n.º 25
0
 def ProcessPage(keys):
     for index, key in enumerate(keys):
         logdebug("Key #%s: %s" % (index, key))
         InvokeMap(key)
Ejemplo n.º 26
0
def _fixkeyend(keyrange, kind):
    if keyrange.key_start and not keyrange.key_end:
        endkey = KeyRange.guess_end_key(kind, keyrange.key_start)
        if endkey and endkey > keyrange.key_start:
            logdebug("Fixing end: %s" % endkey)
            keyrange.key_end = endkey
Ejemplo n.º 27
0
    def GCSCombineToTarget(futurekey, startindex, finishindex, istop, **kwargs):
        logdebug("Enter GCSCombineToTarget: %s, %s" % (startindex, finishindex))
        try:
            def higherlevelcompose(lop, rop):
                try:
                    retval = None
                    if lop and rop:
                        blobnames = [lop.get("blobname"), rop.get("blobname")]
                        blobs = getblobsbyname(gcsbucket, *blobnames)
                        if len(blobs) == 2:
                            ltotalcomponent_count = sum([blob.component_count for blob in blobs])
                            logdebug("ltotalcomponent_count: %s" % ltotalcomponent_count)
                            if ltotalcomponent_count > 1020:
                                logdebug("doing copying")
                                newblobnames = ["%s-copy" % blobname for blobname in blobnames]
                                for ix, blob in enumerate(blobs):
                                    try:
                                        copyblob(gcsbucket, blob, newblobnames[ix])
                                    except Exception:
                                        logexception("deleteblobs(copy)")
                                try:
                                    deleteblobs(gcsbucket, blobs)
                                except Exception:
                                    logexception("deleteblobs(copy)")
                                
                                blobnames = newblobnames
                                blobs = getblobsbyname(gcsbucket, *blobnames)
                                                        
                            if len(blobs) == 2:
                                llocalfilename = gcstargetfilename if istop else GenerateStableId(blobnames[0] + blobnames[1])
                                lfilename = "%s/%s-%s" % (gcstargetprefix, "composed", llocalfilename)
                                retval = composeblobs(gcsbucket, lfilename, blobs)
                                retval["count"] = lop.get("count", 0) + rop.get("count", 0)
                                try:
                                    deleteblobs(gcsbucket, blobs)
                                except Exception:
                                    logexception("deleteblobs")
                        else:
                            raise Exception("Can't load blobs")
                    else:
                        retval = lop if lop else rop
                    return retval
                except Exception, ex:
                    logexception("higherlevelcompose")
                    raise ex
            
            onallchildsuccessf = GenerateOnAllChildSuccess(futurekey, None, higherlevelcompose, failonerror=False)
            
            numfiles = finishindex - startindex
            
            if numfiles > 32:
                ranges = CalculateFileRanges(startindex, finishindex, 2)
                logdebug("ranges:%s" % ranges)
                for r in ranges:
                    futurename = "split %s" % (r, )
                    future(GCSCombineToTarget, futurename=futurename, onallchildsuccessf=onallchildsuccessf, parentkey=futurekey, weight = r[1]-r[0], **taskkwargs)(r[0], r[1], False)
                raise FutureReadyForResult()
            else:
                lblobs = list(listbucket(gcsbucket, gcssourceprefix))[startindex:finishindex]
                lfilename = "%s/%s" % (gcstargetprefix, gcstargetfilename if istop else "composed-%s-%s" % (startindex, finishindex))
#                 lfilename = "%s/%s-%s-%s" % (gcstargetprefix, "composed", startindex, finishindex)
                retval = composeblobs(gcsbucket, lfilename, lblobs)
                return retval
Ejemplo n.º 28
0
 def InvokeMap(futurekey, line, **kwargs):
     logdebug("Enter InvokeMap: %s" % line)
     try:
         return mapf(line, **kwargs)
     finally:
         logdebug("Leave InvokeMap: %s" % line)
Ejemplo n.º 29
0
 def InvokeMap(line, **kwargs):
     logdebug("Enter InvokeMap: %s" % line)
     try:
         mapf(line, **kwargs)
     finally:
         logdebug("Leave InvokeMap: %s" % line)
Ejemplo n.º 30
0
 def ProcessPage(lines):
     for index, line in enumerate(lines):
         logdebug("Line #%s: %s" % (index, line))
         InvokeMap(line)