def MapOverRange(futurekey, startbyte, endbyte, weight, **kwargs): logdebug("Enter MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight)) linitialresult = initialresult if not initialresult is None else 0 loncombineresultsf = oncombineresultsf if oncombineresultsf else lambda a, b: a + b try: # open file at gcspath for read with gcs.open(gcspath) as gcsfile: page, ranges = hwalk(gcsfile, pagesize, 2, startbyte, endbyte) if pagemapf: lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf) taskkwargs["futurename"] = "pagemap %s of %s,%s" % (len(page), startbyte, endbyte) future(pagemapf, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = len(page), **taskkwargs)(page) else: setlocalprogress(futurekey, len(page)) if ranges: newweight = (weight - len(page)) / len(ranges) if not weight is None else None for arange in ranges: taskkwargs["futurename"] = "shard %s" % (arange) lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult if pagemapf else len(page), loncombineresultsf) future(MapOverRange, parentkey=futurekey, onallchildsuccessf=lonallchildsuccessf, weight = newweight, **taskkwargs)(arange[0], arange[1], weight = newweight) if ranges or pagemapf: raise FutureReadyForResult("still going") else: return len(page) finally: logdebug("Leave MapOverRange: %s, %s, %s" % (startbyte, endbyte, weight))
def ndbshardedpagemap(pagemapf=None, ndbquery=None, initialshards = 10, pagesize = 100, **taskkwargs): @task(**taskkwargs) def MapOverRange(keyrange, **kwargs): logdebug("Enter MapOverRange: %s" % keyrange) _fixkeyend(keyrange, kind) filteredquery = keyrange.filter_ndb_query(ndbquery) logdebug (filteredquery) keys, _, more = filteredquery.fetch_page(pagesize, keys_only=True) if pagemapf: pagemapf(keys) if more and keys: newkeyrange = KeyRange(keys[-1], keyrange.key_end, keyrange.direction, False, keyrange.include_end) krlist = newkeyrange.split_range() logdebug("krlist: %s" % krlist) for kr in krlist: MapOverRange(kr) logdebug("Leave MapOverRange: %s" % keyrange) kind = ndbquery.kind krlist = KeyRange.compute_split_points(kind, initialshards) logdebug("first krlist: %s" % krlist) for kr in krlist: MapOverRange(kr)
def OnProgressF(futurekey): futureobj = futurekey.get() if futurekey else None if futureobj.parentkey: taskkwargs = futureobj.get_taskkwargs() logdebug("Enter OnProgressF: %s" % futureobj) @task(**taskkwargs) def UpdateParent(parentkey): logdebug("***************************************************") logdebug("Enter UpdateParent: %s" % parentkey) logdebug("***************************************************") parent = parentkey.get() logdebug("1: %s" % parent) if parent: logdebug("2") # if not parent.has_result(): progress = 0 for childfuture in get_children(parentkey): logdebug("3: %s" % childfuture) progress += childfuture.get_progress() logdebug("4: %s" % progress) parent.set_progress(progress) UpdateParent(futureobj.parentkey)
def runtask(*args, **kwargs): pickled = cloudpickle.dumps((f, args, kwargs, passthroughargs)) logdebug("task pickle length: %s" % len(pickled)) if get_dump(): logdebug("f:") dumper(f) logdebug("args:") dumper(args) logdebug("kwargs:") dumper(kwargs) logdebug("passthroughargs:") dumper(passthroughargs) try: task = taskqueue.Task(payload=pickled, **taskkwargscopy) return task.add(queue, transactional=transactional) except taskqueue.TaskTooLargeError: pickledf = cloudpickle.dumps(f) pickleda = cloudpickle.dumps(args) pickledk = cloudpickle.dumps(kwargs) pickledp = cloudpickle.dumps(passthroughargs) logexception( "task too large, need to use datastore (%s, %s, %s, %s)" % (len(pickledf), len(pickleda), len(pickledk), len(pickledp))) if parent: key = _TaskToRun(data=pickled, parent=parent).put() else: key = _TaskToRun(data=pickled).put() rfspickled = cloudpickle.dumps((None, [key], {}, { "_run_from_datastore": True })) task = taskqueue.Task(payload=rfspickled, **taskkwargscopy) return task.add(queue, transactional=transactional)
def run_task(*args, **kwargs): pickled = cloudpickle.dumps((f, args, kwargs, extra)) logdebug("task pickle length: %s" % len(pickled)) if get_dump(): logdebug("f:") dumper(f) logdebug("args:") dumper(args) logdebug("kwargs:") dumper(kwargs) logdebug("extra:") dumper(extra) try: t = taskqueue.Task(payload=pickled, **task_kwargs) return t.add(queue, transactional=transactional) except taskqueue.TaskTooLargeError: if parent: key = _TaskToRun(data=pickled, parent=parent).put() else: key = _TaskToRun(data=pickled).put() ds_pickled = cloudpickle.dumps((None, [key], {}, { "_run_from_datastore": True })) t = taskqueue.Task(payload=ds_pickled, **task_kwargs) return t.add(queue, transactional=transactional)
def InvokeMap(futurekey, key, **kwargs): logdebug("Enter InvokeMap: %s" % key) try: obj = key.get() if not obj: raise RetryTaskException("couldn't get object for key %s" % key) return mapf(futurekey, obj, **kwargs) finally: logdebug("Leave InvokeMap: %s" % key)
def toplevel(futurekey, *args, **kwargs): logdebug("Enter futureparallel.toplevel: %s" % futurekey) def OnAllChildSuccess(): logdebug("Enter OnAllChildSuccess: %s" % futurekey) parentfuture = futurekey.get() if futurekey else None if parentfuture and not parentfuture.has_result(): if not parentfuture.initialised or not parentfuture.readyforresult: raise Exception("Parent not initialised, retry") @ndb.transactional() def get_children_trans(): return get_children(parentfuture.key) children = get_children_trans() logdebug("children: %s" % [child.key for child in children]) if children: result = [] error = None finished = True for childfuture in children: logdebug("childfuture: %s" % childfuture.key) if childfuture.has_result(): try: childresult = childfuture.get_result() logdebug("childresult(%s): %s" % (childfuture.status, childresult)) result += [childfuture.get_result()] logdebug("intermediate result:%s" % result) except Exception, ex: logdebug("haserror:%s" % repr(ex)) error = ex break else: logdebug("noresult") finished = False if error: logwarning( "Internal error, child has error in OnAllChildSuccess: %s" % error) parentfuture.set_failure(error) elif finished: logdebug("result: %s" % result) parentfuture.set_success(result) else: logdebug( "child not finished in OnAllChildSuccess, skipping" ) else: logwarning( "Internal error, parent has no children in OnAllChildSuccess" ) parentfuture.set_failure(Exception("no children found"))
def dofuturendbshardedmap(futurekey): logdebug(taskkwargs) linitialresult = initialresult if not initialresult is None else 0 loncombineresultsf = oncombineresultsf if oncombineresultsf else lambda a, b: a + b def MapOverRange(futurekey, keyrange, weight, **kwargs): logdebug("Enter MapOverRange: %s" % keyrange) try: _fixkeyend(keyrange, kind) filteredquery = keyrange.filter_ndb_query(ndbquery) logdebug (filteredquery) keys, _, more = filteredquery.fetch_page(pagesize, keys_only=True) lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, 0 if pagemapf else len(keys), lambda a, b: a + b) if pagemapf: futurename = "pagemap %s of %s" % (len(keys), keyrange) lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf) future(pagemapf, parentkey=futurekey, futurename=futurename, onallchildsuccessf=lonallchildsuccessf, weight = len(keys), **taskkwargs)(keys) else: setlocalprogress(futurekey, len(keys)) if more and keys: lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult if pagemapf else len(keys), loncombineresultsf) newkeyrange = KeyRange(keys[-1], keyrange.key_end, keyrange.direction, False, keyrange.include_end) krlist = newkeyrange.split_range() logdebug("krlist: %s" % krlist) newweight = (weight / len(krlist)) - len(keys) if weight else None for kr in krlist: futurename = "shard %s" % (kr) future(MapOverRange, parentkey=futurekey, futurename=futurename, onallchildsuccessf = lonallchildsuccessf, weight = newweight, **taskkwargs)(kr, weight = newweight) # if pagemapf or (more and keys): # if (more and keys): raise FutureReadyForResult("still going") else: return len(keys)#(len(keys), 0, keyrange) # return len(keys) finally: logdebug("Leave MapOverRange: %s" % keyrange) for kr in krlist: lonallchildsuccessf = GenerateOnAllChildSuccess(futurekey, linitialresult, loncombineresultsf) futurename = "shard %s" % (kr) newweight = weight / len(krlist) if weight else None future(MapOverRange, parentkey=futurekey, futurename=futurename, onallchildsuccessf=lonallchildsuccessf, weight = newweight, **taskkwargs)(kr, weight = newweight) raise FutureReadyForResult("still going")
def higherlevelcompose(lop, rop): try: retval = None if lop and rop: blobnames = [lop.get("blobname"), rop.get("blobname")] blobs = getblobsbyname(gcsbucket, *blobnames) if len(blobs) == 2: ltotalcomponent_count = sum( [blob.component_count for blob in blobs]) logdebug("ltotalcomponent_count: %s" % ltotalcomponent_count) if ltotalcomponent_count > 1020: logdebug("doing copying") newblobnames = [ "%s-copy" % blobname for blobname in blobnames ] for ix, blob in enumerate(blobs): try: copyblob(gcsbucket, blob, newblobnames[ix]) except Exception: logexception("deleteblobs(copy)") try: deleteblobs(gcsbucket, blobs) except Exception: logexception("deleteblobs(copy)") blobnames = newblobnames blobs = getblobsbyname(gcsbucket, *blobnames) if len(blobs) == 2: llocalfilename = gcstargetfilename if istop else GenerateStableId( blobnames[0] + blobnames[1]) lfilename = "%s/%s-%s" % (gcstargetprefix, "composed", llocalfilename) retval = composeblobs(gcsbucket, lfilename, blobs) retval["count"] = lop.get( "count", 0) + rop.get("count", 0) try: deleteblobs(gcsbucket, blobs) except Exception: logexception("deleteblobs") else: raise Exception("Can't load blobs") else: retval = lop if lop else rop return retval except Exception, ex: logexception("higherlevelcompose") raise ex
def InvokeMap(key, **kwargs): logdebug("Enter InvokeMap: %s" % key) try: obj = key.get() if not obj: if not skipmissing: raise RetryTaskException("couldn't get object for key %s" % key) # else just skip else: mapf(obj, **kwargs) finally: logdebug("Leave InvokeMap: %s" % key)
def toplevel(futurekey, *args, **kwargs): logdebug("Enter futuresequence.toplevel: %s" % futurekey) def childonsuccessforindex(index, results): logdebug("Enter childonsuccessforindex: %s, %s, %s" % (futurekey, index, json.dumps(results, indent=2))) def childonsuccess(childfuturekey): logdebug("Enter childonsuccess: %s, %s, %s" % (futurekey, index, childfuturekey)) logdebug("results: %s" % json.dumps(results, indent=2)) try: childfuture = GetFutureAndCheckReady(childfuturekey) try: result = childfuture.get_result() except Exception, ex: toplevelfuture = futurekey.get() if toplevelfuture: toplevelfuture.set_failure(ex) else: raise Exception( "Can't load toplevel future for failure") else: logdebug("result: %s" % json.dumps(result, indent=2)) newresults = results + [result] islast = (index == (len(flist) - 1)) if islast: logdebug("islast") toplevelfuture = futurekey.get() if toplevelfuture: logdebug("setting top level success") toplevelfuture.set_success_and_readyforesult( newresults) else: raise Exception( "Can't load toplevel future for success") else: logdebug("not last") taskkwargs["futurename"] = "%s [%s]" % ( futurenameprefix if futurenameprefix else "-", index + 1) future(flist[index + 1], parentkey=futurekey, onsuccessf=childonsuccessforindex( index + 1, newresults), weight=weight / len(flist) if weight else None, timeoutsec=timeoutsec, maxretries=maxretries, **taskkwargs)(newresults)
def task(f=None, **kw): if not f: return functools.partial(task, **kw) task_kwargs = deepcopy(kw) queue = task_kwargs.pop("queue", "default") transactional = task_kwargs.pop("transactional", False) parent = task_kwargs.pop("parent", None) include_headers = task_kwargs.pop("includeheaders", False) log_name = task_kwargs.pop( "logname", "%s/%s" % (getattr(f, '__module__', 'none'), getattr(f, '__name__', 'none'))) task_kwargs["headers"] = dict(_TASKQUEUE_HEADERS) url = get_enqueue_url(log_name) # _DEFAULT_ENQUEUE_URL % logname task_kwargs["url"] = url.lower() logdebug(task_kwargs) extra = {"includeheaders": include_headers} @functools.wraps(f) def run_task(*args, **kwargs): pickled = cloudpickle.dumps((f, args, kwargs, extra)) logdebug("task pickle length: %s" % len(pickled)) if get_dump(): logdebug("f:") dumper(f) logdebug("args:") dumper(args) logdebug("kwargs:") dumper(kwargs) logdebug("extra:") dumper(extra) try: t = taskqueue.Task(payload=pickled, **task_kwargs) return t.add(queue, transactional=transactional) except taskqueue.TaskTooLargeError: if parent: key = _TaskToRun(data=pickled, parent=parent).put() else: key = _TaskToRun(data=pickled).put() ds_pickled = cloudpickle.dumps((None, [key], {}, { "_run_from_datastore": True })) t = taskqueue.Task(payload=ds_pickled, **task_kwargs) return t.add(queue, transactional=transactional) return run_task
def getvalue(*args, **kwargs): lcachekey = cachekey if cachekey else make_flash(f, *args, **kwargs) retval = memcache.get(lcachekey) # @UndefinedVariable if retval is None: logdebug("MISS: %s" % lcachekey) retval = f(*args, **kwargs) memcache.add(key=lcachekey, value=retval, time=expiresec) # @UndefinedVariable else: logdebug("HIT: %s" % lcachekey) return retval
def MapOverRange(startpos, endpos, **kwargs): logdebug("Enter MapOverRange: %s, %s" % (startpos, endpos)) # open file at gcspath for read with gcs.open(gcspath) as gcsfile: page, ranges = hwalk(gcsfile, pagesize, initialshards, startpos, endpos) if ranges: for arange in ranges: MapOverRange(arange[0], arange[1]) if pagemapf: pagemapf(page) logdebug("Leave MapOverRange: %s, %s" % (startpos, endpos))
def childonsuccess(childfuturekey): logdebug("Enter childonsuccess: %s, %s, %s" % (futurekey, index, childfuturekey)) logdebug("results: %s" % json.dumps(results, indent=2)) try: childfuture = GetFutureAndCheckReady(childfuturekey) try: result = childfuture.get_result() except Exception, ex: toplevelfuture = futurekey.get() if toplevelfuture: toplevelfuture.set_failure(ex) else: raise Exception( "Can't load toplevel future for failure") else:
def _set_local_progress_for_success(self): progressObj = self._get_progressobject() logdebug("progressObj = %s" % progressObj) weight = self.get_weight(progressObj) weight = weight or 1 logdebug("weight = %s" % weight) localprogress = self.get_localprogress(progressObj) logdebug("localprogress = %s" % localprogress) if localprogress < weight and not self.GetChildren(): logdebug("No children, we can auto set localprogress from weight") self.set_localprogress(weight)
def _futurewrapper(headers): if maxretries: lretryCount = 0 try: lretryCount = int( headers.get("X-Appengine-Taskretrycount", 0)) if headers else 0 except: logexception( "Failed trying to get retry count, using 0") if lretryCount > maxretries: raise PermanentTaskFailure( "Too many retries of Future") logdebug("inner, futurekey=%s" % futurekey) futureobj2 = futurekey.get() if futureobj2: futureobj2.set_weight(weight) # if weight >= 1 else 1) else: raise Exception("Future not ready yet") try: logdebug("args, kwargs=%s, %s" % (args, kwargs)) result = f(futurekey, *args, **kwargs) except FutureReadyForResult: futureobj3 = futurekey.get() if futureobj3: futureobj3.set_readyforesult() except FutureNotReadyForResult: futureobj4 = futurekey.get() if futureobj4: futureobj4.set_initialised() except PermanentTaskFailure, ptf: try: futureobj5 = futurekey.get() if futureobj5: futureobj5.set_failure(ptf) finally: raise ptf
def _launch_task(pickled, name, headers): try: # Add some task debug information. # dheaders = [] # for key, value in headers.items(): # k = key.lower() # if k.startswith("x-appengine-") and k not in _SKIP_HEADERS: # dheaders.append("%s:%s" % (key, value)) # logdebug(", ".join(dheaders)) logdebug(", ".join( ["%s:%s" % (key, value) for key, value in headers.items()])) if not isFromTaskQueue(headers): raise PermanentTaskFailure( 'Detected an attempted XSRF attack: we are not executing from a task queue.' ) logdebug('before run "%s"' % name) _run(pickled, headers) logdebug('after run "%s"' % name) except PermanentTaskFailure: logexception("Aborting task") except: logexception("failure") raise
def intask(self, nameprefix, f, *args, **kwargs): taskkwargs = self.get_taskkwargs() name = "" if nameprefix: name = "%s-%s" % (nameprefix, self.key.id()) taskkwargs["name"] = name elif taskkwargs.get("name"): del taskkwargs["name"] taskkwargs["transactional"] = False @task(**taskkwargs) def dof(): f(*args, **kwargs) try: # run the wrapper task, and if it fails due to a name clash just skip it (it was already kicked off by an earlier # attempt to construct this future). # logdebug("about to run task %s" % name) dof() except taskqueue.TombstonedTaskError: logdebug("skip adding task %s (already been run)" % name) except taskqueue.TaskAlreadyExistsError: logdebug("skip adding task %s (already running)" % name)
def MapOverRange(keyrange, **kwargs): logdebug("Enter MapOverRange: %s" % keyrange) _fixkeyend(keyrange, kind) filteredquery = keyrange.filter_ndb_query(ndbquery) logdebug (filteredquery) keys, _, more = filteredquery.fetch_page(pagesize, keys_only=True) if pagemapf: pagemapf(keys) if more and keys: newkeyrange = KeyRange(keys[-1], keyrange.key_end, keyrange.direction, False, keyrange.include_end) krlist = newkeyrange.split_range() logdebug("krlist: %s" % krlist) for kr in krlist: MapOverRange(kr) logdebug("Leave MapOverRange: %s" % keyrange)
def set_localprogress(self, value): obj = self._get_progressobject() local = self.get_localprogress(obj) calculated = self.get_calculatedprogress(obj) if local != value: # haschildren = self.GetChildren() # logdebug("haschildren: %s" % haschildren) obj.localprogress = value logdebug("localprogress: %s" % value) # if not haschildren: changed = value > calculated if changed: logdebug("setting calculated progress") obj.calculatedprogress = value obj.put() if changed: logdebug("kicking off calculate parent progress") self._calculate_parent_progress() self._callOnProgress()
def set_localprogress(self, value): progressobj = self._get_progressobject() localprogress = self.get_localprogress(progressobj) calculatedprogress = self.get_calculatedprogress(progressobj) if localprogress != value: # haschildren = self.GetChildren() # logdebug("haschildren: %s" % haschildren) progressobj.localprogress = value logdebug("localprogress: %s" % value) # if not haschildren: lneedupd = value > calculatedprogress if lneedupd: logdebug("setting calculated progress") progressobj.calculatedprogress = value progressobj.put() if lneedupd: logdebug("kicking off calculate parent progress") self._calculate_parent_progress() self._callOnProgress()
def getvalue(*args, **kwargs): key = cachekey if cachekey else make_flash(f, args, kwargs) logdebug("Enter gcscacher.getvalue: %s" % key) bucket = bucketname if bucketname else os.environ.get( 'BUCKET_NAME', app_identity.get_default_gcs_bucket_name()) lpicklepath = "/%s/gcscache/%s.pickle" % (bucket, key) logdebug("picklepath: %s" % lpicklepath) lsaved = None try: #1: Get the meta info with gcs.open(lpicklepath) as picklefile: lsaved = pickle.load(picklefile) except gcs.NotFoundError: pass lexpireat = lsaved.get("expireat") if lsaved else None lcontent = None lcacheIsValid = False if lsaved and not (lexpireat and lexpireat < get_utcnow_unixtimestampusec()): lcontent = lsaved.get("content") lcacheIsValid = True if not lcacheIsValid: logdebug("GCS Cache miss") lcontent = f(*args, **kwargs) logdebug("write content back to gcs") ltosave = { "expireat": get_utcnow_unixtimestampusec() + (expiresec * 1000000) if expiresec else None, "content": lcontent } with gcs.open(lpicklepath, "w") as picklefilewrite: cloudpickle.dump(ltosave, picklefilewrite) else: logdebug("GCS Cache hit") logdebug("Leave gcscacher.getvalue: %s" % key) return lcontent
if numfiles > 32: ranges = CalculateFileRanges(startindex, finishindex, 2) logdebug("ranges:%s" % ranges) for r in ranges: futurename = "split %s" % (r, ) future(GCSCombineToTarget, futurename=futurename, onallchildsuccessf=onallchildsuccessf, parentkey=futurekey, weight = r[1]-r[0], **taskkwargs)(r[0], r[1], False) raise FutureReadyForResult() else: lblobs = list(listbucket(gcsbucket, gcssourceprefix))[startindex:finishindex] lfilename = "%s/%s" % (gcstargetprefix, gcstargetfilename if istop else "composed-%s-%s" % (startindex, finishindex)) # lfilename = "%s/%s-%s-%s" % (gcstargetprefix, "composed", startindex, finishindex) retval = composeblobs(gcsbucket, lfilename, lblobs) return retval finally: logdebug("Leave GCSCombineToTarget: %s, %s" % (startindex, finishindex)) futurename = "gcscombinetotarget %s" % (numgcsfiles) return future(GCSCombineToTarget, futurename=futurename, onsuccessf = onsuccessf, onfailuref = onfailuref, onprogressf = onprogressf, parentkey=parentkey, weight = numgcsfiles, **taskkwargs)(0, numgcsfiles, True) def hwalk(afile, pagesizeinlines, numranges, startbytes, endbytes): ## print(afile, pagesizeinlines, numranges, startbytes, endbytes) page = [] ranges = [] if startbytes <= 0: #1: we're at the start of the file, just start here afile.seek(0, 0) else:
def ProcessPage(keys): for index, key in enumerate(keys): logdebug("Key #%s: %s" % (index, key)) InvokeMap(key)
def _fixkeyend(keyrange, kind): if keyrange.key_start and not keyrange.key_end: endkey = KeyRange.guess_end_key(kind, keyrange.key_start) if endkey and endkey > keyrange.key_start: logdebug("Fixing end: %s" % endkey) keyrange.key_end = endkey
def GCSCombineToTarget(futurekey, startindex, finishindex, istop, **kwargs): logdebug("Enter GCSCombineToTarget: %s, %s" % (startindex, finishindex)) try: def higherlevelcompose(lop, rop): try: retval = None if lop and rop: blobnames = [lop.get("blobname"), rop.get("blobname")] blobs = getblobsbyname(gcsbucket, *blobnames) if len(blobs) == 2: ltotalcomponent_count = sum([blob.component_count for blob in blobs]) logdebug("ltotalcomponent_count: %s" % ltotalcomponent_count) if ltotalcomponent_count > 1020: logdebug("doing copying") newblobnames = ["%s-copy" % blobname for blobname in blobnames] for ix, blob in enumerate(blobs): try: copyblob(gcsbucket, blob, newblobnames[ix]) except Exception: logexception("deleteblobs(copy)") try: deleteblobs(gcsbucket, blobs) except Exception: logexception("deleteblobs(copy)") blobnames = newblobnames blobs = getblobsbyname(gcsbucket, *blobnames) if len(blobs) == 2: llocalfilename = gcstargetfilename if istop else GenerateStableId(blobnames[0] + blobnames[1]) lfilename = "%s/%s-%s" % (gcstargetprefix, "composed", llocalfilename) retval = composeblobs(gcsbucket, lfilename, blobs) retval["count"] = lop.get("count", 0) + rop.get("count", 0) try: deleteblobs(gcsbucket, blobs) except Exception: logexception("deleteblobs") else: raise Exception("Can't load blobs") else: retval = lop if lop else rop return retval except Exception, ex: logexception("higherlevelcompose") raise ex onallchildsuccessf = GenerateOnAllChildSuccess(futurekey, None, higherlevelcompose, failonerror=False) numfiles = finishindex - startindex if numfiles > 32: ranges = CalculateFileRanges(startindex, finishindex, 2) logdebug("ranges:%s" % ranges) for r in ranges: futurename = "split %s" % (r, ) future(GCSCombineToTarget, futurename=futurename, onallchildsuccessf=onallchildsuccessf, parentkey=futurekey, weight = r[1]-r[0], **taskkwargs)(r[0], r[1], False) raise FutureReadyForResult() else: lblobs = list(listbucket(gcsbucket, gcssourceprefix))[startindex:finishindex] lfilename = "%s/%s" % (gcstargetprefix, gcstargetfilename if istop else "composed-%s-%s" % (startindex, finishindex)) # lfilename = "%s/%s-%s-%s" % (gcstargetprefix, "composed", startindex, finishindex) retval = composeblobs(gcsbucket, lfilename, lblobs) return retval
def InvokeMap(futurekey, line, **kwargs): logdebug("Enter InvokeMap: %s" % line) try: return mapf(line, **kwargs) finally: logdebug("Leave InvokeMap: %s" % line)
def InvokeMap(line, **kwargs): logdebug("Enter InvokeMap: %s" % line) try: mapf(line, **kwargs) finally: logdebug("Leave InvokeMap: %s" % line)
def ProcessPage(lines): for index, line in enumerate(lines): logdebug("Line #%s: %s" % (index, line)) InvokeMap(line)