def check_job_itemdetail_status(self): selleritemdetailsjobs_completed = 0 newsellerlist=[] filenamelist=[] for seller in self.successsellers: if seller.get('jobcompletecheck', True) == False: newsellerlist.append(seller) else: cloud.cloud.cloudLog.critical("job failed for seller "+str(seller)) if seller.get('itemdetailjobid'):cloud.kill(seller['itemdetailjobid']) if seller.get('itemdetailjoblist'):cloud.kill(seller['itemdetailjoblist']) while selleritemdetailsjobs_completed != len(newsellerlist): try: for seller in newsellerlist: if seller.get('jobcompletecheck', True) == False: sellerjobfailedcount = 0 sellercompletedjobs = 0 for jobid in seller['itemdetailjoblist']: if cloud.status(jobid) in ('error', 'stalled', 'killed'): sellerjobfailedcount += 1 sellercompletedjobs += 1 if cloud.status(jobid) in ('done'): sellercompletedjobs += 1 if sellerjobfailedcount > SELLERITEMDETAIL_ALLOWED_JOB_FAIL_COUNT: update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR")) cloud.kill(seller['itemdetailjoblist']) selleritemdetailsjobs_completed += 1 seller['jobcompletecheck'] = True logger.exception(seller['sellerid'] + " jobs kill after two job failed") elif sellercompletedjobs >= len(seller['itemdetailjoblist']): selleritemdetailsjobs_completed += 1 seller['jobcompletecheck'] = True cloud.delete(seller['itemdetailjobid']) self.download_dumps(seller['filenamelist']) filenamelist+=seller['filenamelist'] update_sellers_status([seller], dict(phase="INDEXING", progress="ENDED")) print "Job detail wait loop" time.sleep(3) except Exception, e: print e
def delete_job_related_data_in_picloud(self, sellerjobids, queuejobdis, itemdetailjobids): if itemdetailjobids: cloud.delete(sellerjobids) if queuejobdis: cloud.delete(queuejobdis) if itemdetailjobids: cloud.delete(itemdetailjobids)
def ProcessFile(filename): """Finds the frequencey of ELM instances in a FASTA file. Returns a defaultdict key-ed by ('ELM_NAME', 'ELM_SEQ') and has a value of counts/AA """ def ProcessSeq(inp): seq_chunk, d = inp count_dict = defaultdict(int) elm_dict = {} for key, val in d.items(): elm_dict[key] = re.compile(val) for i, seq in enumerate(seq_chunk): print i for elm, reg in elm_dict.items(): m = reg.search(seq) while m: count_dict[(elm, m.group())] += 1 m = reg.search(seq, m.start()+1) return count_dict def ChunkGen(filename, per_block): gen = SeqGen(filename) block = take(per_block, gen) c = per_block while block: #print 'yeilding block %d' % c yield block block = take(per_block, gen) c += per_block #if c > 3000: break label = filename.split(os.sep)[-1] elm_dict = ReadELMs_nocompile('elm_expressions.txt') count_dict = defaultdict(int) elm_count = defaultdict(int) jids = [] for block in ChunkGen(filename, 100): try: jids.append(cloud.call(ProcessSeq, (block, elm_dict), _label=filename)) except cloud.cloud.CloudException: try: print 'tooooo big' jids.append(cloud.call(ProcessSeq, (block[0::2], elm_dict), _label=filename)) jids.append(cloud.call(ProcessSeq, (block[1::2], elm_dict), _label=filename)) except cloud.cloud.CloudException: print 'really tooo big' jids.append(cloud.call(ProcessSeq, (block[0::3], elm_dict), _label=filename)) jids.append(cloud.call(ProcessSeq, (block[1::3], elm_dict), _label=filename)) jids.append(cloud.call(ProcessSeq, (block[2::3], elm_dict), _label=filename)) #print str(jids) print 'waiting!' for i, res in enumerate(cloud.iresult(jids)): if i % 4 == 0: print 'processing result %d' % i for key, item in res.iteritems(): elm, spec = key count_dict[key] += item elm_count[elm] += item cloud.delete(jids) outdict = {} for key, count in count_dict.iteritems(): elm, spec = key outdict[key] = (count, float(count) / float(elm_count[elm])) return outdict
import cloud import time def square(x): time.sleep(x) return x*x jids = cloud.map(square, range(4)) # delete information about these jobs from server cloud.delete(jids)
def DictFromGen(GEN, elmfile, label = None, chunk_size = 10, stop_count = None): """Creates a dictionary of ELM frequencies from a generator""" def ChunkGen(gen, per_block, stop_count): block = take(per_block, gen) c = per_block while block: logging.debug('yeilding block %d, len: %d' % (c, len(block))) yield block block = take(per_block, gen) c += per_block if stop_count != None and stop_count < c: break def ProcessSeq(seq_chunk, d): count_dict = defaultdict(int) elm_dict = {} for key, val in d.items(): elm_dict[key] = re.compile(val) for i, seq in enumerate(seq_chunk): #print i for elm, reg in elm_dict.items(): m = reg.search(seq) while m: count_dict[(elm, m.group())] += 1 m = reg.search(seq, m.start()+1) return count_dict elm_dict = ReadELMs_nocompile(elmfile) jids = [] for chunk in ChunkGen(GEN, chunk_size, stop_count): submitted = False s = 1 tids = [] while not submitted: c = 1 while c<=s: try: #try to submit the current slice of the block logging.debug('Submitting a chunk to the cloud') id = cloud.call(ProcessSeq, chunk[c-1::s], elm_dict, _label = label) tids.append(id) c+=1 submitted = True except cloud.cloud.CloudException: #if there is an exception because there is too much info #then kill, delete the cloud.call and then increase the #slicing submitted = False cloud.kill(tids) cloud.delete(tids) logging.warning('Chunk was too big at slice: %d' %c) s += 1 break jids += tids count_dict = defaultdict(int) elm_count = defaultdict(int) logging.warning('Waiting for the cloud') for i, res in enumerate(cloud.iresult(jids)): logging.debug('Processing result: %s' % i) for key, item in res.iteritems(): elm, spec = key count_dict[key] += item elm_count[elm] += item logging.info('Deleting jobs') cloud.delete(jids) logging.info('Creating output dictionary') outdict = {} for key, count in count_dict.iteritems(): elm, spec = key outdict[key] = (count, float(count) / float(elm_count[elm])) return outdict