def launch_jobs(bucket, key_names, work_fn, combine, acc, label, _type, accept_none_as_result, retry_timeouts = True): jids = cloud.map(\ lambda name: work_fn(bucket, name), key_names, _type = _type, _label= label, _env = 'compute') timed_out = [] try: progress = progressbar.ProgressBar(len(jids)).start() for (i, result) in enumerate(cloud.iresult(jids, num_in_parallel = 25)): if result is None and not accept_none_as_result: print "Job #", jids[i], key_names[i], "returned None" elif combine: # client-side reduction! Be careful about not doing too much # work here new_acc = combine(acc, result) if new_acc is not None: acc = new_acc progress.update(i+1) except KeyboardInterrupt: print "Caught keyboard interrupt, killing active workers..." cloud.kill(jids) return acc except cloud.CloudException as e: if isinstance(e.parameter, ssl.SSLError) and retry_timeouts: print "Job #", jids[i], "timed out" timed_out.append(key_names[i]) else: print "Killing workers..." cloud.kill(jids) raise except: print "Killing workers..." cloud.kill(jids) raise finally: progress.finish() if len(timed_out) > 0: return launch_jobs(bucket, key_names, work_fn, combine, acc, label, _type, accept_none_as_result, retry_timeouts = None) return acc
def check_job_itemdetail_status(self): selleritemdetailsjobs_completed = 0 newsellerlist=[] filenamelist=[] for seller in self.successsellers: if seller.get('jobcompletecheck', True) == False: newsellerlist.append(seller) else: cloud.cloud.cloudLog.critical("job failed for seller "+str(seller)) if seller.get('itemdetailjobid'):cloud.kill(seller['itemdetailjobid']) if seller.get('itemdetailjoblist'):cloud.kill(seller['itemdetailjoblist']) while selleritemdetailsjobs_completed != len(newsellerlist): try: for seller in newsellerlist: if seller.get('jobcompletecheck', True) == False: sellerjobfailedcount = 0 sellercompletedjobs = 0 for jobid in seller['itemdetailjoblist']: if cloud.status(jobid) in ('error', 'stalled', 'killed'): sellerjobfailedcount += 1 sellercompletedjobs += 1 if cloud.status(jobid) in ('done'): sellercompletedjobs += 1 if sellerjobfailedcount > SELLERITEMDETAIL_ALLOWED_JOB_FAIL_COUNT: update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR")) cloud.kill(seller['itemdetailjoblist']) selleritemdetailsjobs_completed += 1 seller['jobcompletecheck'] = True logger.exception(seller['sellerid'] + " jobs kill after two job failed") elif sellercompletedjobs >= len(seller['itemdetailjoblist']): selleritemdetailsjobs_completed += 1 seller['jobcompletecheck'] = True cloud.delete(seller['itemdetailjobid']) self.download_dumps(seller['filenamelist']) filenamelist+=seller['filenamelist'] update_sellers_status([seller], dict(phase="INDEXING", progress="ENDED")) print "Job detail wait loop" time.sleep(3) except Exception, e: print e
seller['jobcompletecheck'] = False seller['filenamelist'] = jobtempfilenames except Exception, e: update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR")) #cloud.join(itemdetailjobids,ignore_errors=False) successfullsellerfilenamelist=self.check_job_itemdetail_status() #self.delete_job_related_data_in_picloud(jobids, mainjobids, itemdetailjobids) except Exception,e: print e finally: for seller in self.successsellers: if cloud.status(seller['jobid']) not in ('done','error', 'stalled', 'killed'): cloud.kill(seller['jobid']) update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR")) logger.critical("Job of "+str(seller['sellerid'])+" killed although running as there were exception from client rest call") removelockfile() return successfullsellerfilenamelist def delete_job_related_data_in_picloud(self, sellerjobids, queuejobdis, itemdetailjobids): if itemdetailjobids: cloud.delete(sellerjobids) if queuejobdis: cloud.delete(queuejobdis) if itemdetailjobids: cloud.delete(itemdetailjobids) def check_job_itemdetail_status(self): selleritemdetailsjobs_completed = 0
import cloud import time def square(x): time.sleep(x) return x*x jids = cloud.map(square, range(4)) # jobs have completed, kill should not have any bad effect cloud.kill(jids)
def DictFromGen(GEN, elmfile, label = None, chunk_size = 10, stop_count = None): """Creates a dictionary of ELM frequencies from a generator""" def ChunkGen(gen, per_block, stop_count): block = take(per_block, gen) c = per_block while block: logging.debug('yeilding block %d, len: %d' % (c, len(block))) yield block block = take(per_block, gen) c += per_block if stop_count != None and stop_count < c: break def ProcessSeq(seq_chunk, d): count_dict = defaultdict(int) elm_dict = {} for key, val in d.items(): elm_dict[key] = re.compile(val) for i, seq in enumerate(seq_chunk): #print i for elm, reg in elm_dict.items(): m = reg.search(seq) while m: count_dict[(elm, m.group())] += 1 m = reg.search(seq, m.start()+1) return count_dict elm_dict = ReadELMs_nocompile(elmfile) jids = [] for chunk in ChunkGen(GEN, chunk_size, stop_count): submitted = False s = 1 tids = [] while not submitted: c = 1 while c<=s: try: #try to submit the current slice of the block logging.debug('Submitting a chunk to the cloud') id = cloud.call(ProcessSeq, chunk[c-1::s], elm_dict, _label = label) tids.append(id) c+=1 submitted = True except cloud.cloud.CloudException: #if there is an exception because there is too much info #then kill, delete the cloud.call and then increase the #slicing submitted = False cloud.kill(tids) cloud.delete(tids) logging.warning('Chunk was too big at slice: %d' %c) s += 1 break jids += tids count_dict = defaultdict(int) elm_count = defaultdict(int) logging.warning('Waiting for the cloud') for i, res in enumerate(cloud.iresult(jids)): logging.debug('Processing result: %s' % i) for key, item in res.iteritems(): elm, spec = key count_dict[key] += item elm_count[elm] += item logging.info('Deleting jobs') cloud.delete(jids) logging.info('Creating output dictionary') outdict = {} for key, count in count_dict.iteritems(): elm, spec = key outdict[key] = (count, float(count) / float(elm_count[elm])) return outdict
import cloud def infinite_loop(): while True: pass jid = cloud.call(infinite_loop) #start a job which will never end cloud.kill(jid) #at least until you kill it
def kill(self, task, *args, **kargs): request = task.workRequest() if PythonEval.KEY_TICKET_ID in request.kwds: ticketId = request.kwds[PythonEval.KEY_TICKET_ID] cloud.kill(ticketId) return
def test_exception1(): '''Raise TypeError since cloud.call called with string argument''' cloud.kill("asdf")
def test_kill_all(): '''Kill all running jobs when calling without argument''' assert cloud.kill() == None
def test_kill(): '''Kill specified job''' jid = cloud.call(infinite_loop) #start a job which will never end assert cloud.kill(jid) == None #at least until you kill it