Esempio n. 1
0
 def check_job_itemdetail_status(self):  
     selleritemdetailsjobs_completed = 0
     newsellerlist=[]
     filenamelist=[]
     for seller in self.successsellers:
         if seller.get('jobcompletecheck', True) == False:
             newsellerlist.append(seller)
         else:
             cloud.cloud.cloudLog.critical("job failed for seller "+str(seller))
             if seller.get('itemdetailjobid'):cloud.kill(seller['itemdetailjobid'])
             if seller.get('itemdetailjoblist'):cloud.kill(seller['itemdetailjoblist'])
             
     while selleritemdetailsjobs_completed != len(newsellerlist):
         try:
             for seller in newsellerlist:
                 if seller.get('jobcompletecheck', True) == False:
                     sellerjobfailedcount = 0
                     sellercompletedjobs = 0
                     for jobid in seller['itemdetailjoblist']:
                         if cloud.status(jobid) in ('error', 'stalled', 'killed'):
                             sellerjobfailedcount += 1
                             sellercompletedjobs += 1
                         if cloud.status(jobid) in ('done'):
                             sellercompletedjobs += 1
                     if sellerjobfailedcount > SELLERITEMDETAIL_ALLOWED_JOB_FAIL_COUNT:
                         update_sellers_status([seller], dict(phase="INDEXING", progress="ERROR"))
                         cloud.kill(seller['itemdetailjoblist'])
                         selleritemdetailsjobs_completed += 1
                         seller['jobcompletecheck'] = True
                         logger.exception(seller['sellerid'] + " jobs kill after two job failed")
                     elif sellercompletedjobs >= len(seller['itemdetailjoblist']):
                         selleritemdetailsjobs_completed += 1
                         seller['jobcompletecheck'] = True
                         cloud.delete(seller['itemdetailjobid'])
                         self.download_dumps(seller['filenamelist'])
                         filenamelist+=seller['filenamelist']
                         update_sellers_status([seller], dict(phase="INDEXING", progress="ENDED"))
             print "Job detail wait loop"
             time.sleep(3)
         except Exception, e:
             print e
Esempio n. 2
0
 def delete_job_related_data_in_picloud(self, sellerjobids, queuejobdis, itemdetailjobids):
     if itemdetailjobids:
         cloud.delete(sellerjobids)
     if queuejobdis:
         cloud.delete(queuejobdis)
     if itemdetailjobids:
         cloud.delete(itemdetailjobids)
Esempio n. 3
0
def ProcessFile(filename):
	"""Finds the frequencey of ELM instances in a FASTA file.

	Returns a defaultdict key-ed by ('ELM_NAME', 'ELM_SEQ') and has a value of
	counts/AA
	"""

	def ProcessSeq(inp):
		seq_chunk, d = inp
		count_dict = defaultdict(int)
		elm_dict = {}
		for key, val in d.items():
			elm_dict[key] = re.compile(val)
		for i, seq in enumerate(seq_chunk):		
			print i
			for elm, reg in elm_dict.items():
				m = reg.search(seq)
				while m:
					count_dict[(elm, m.group())] += 1
					m = reg.search(seq, m.start()+1)
		return count_dict

	def ChunkGen(filename, per_block):
		gen = SeqGen(filename)
		block = take(per_block, gen)
		c = per_block
		while block:
			#print 'yeilding block %d' % c
			yield block
			block = take(per_block, gen)
			c += per_block
			#if c > 3000: break

	label = filename.split(os.sep)[-1]
	elm_dict = ReadELMs_nocompile('elm_expressions.txt')

	count_dict = defaultdict(int)
	elm_count = defaultdict(int)
	jids = []
	for block in ChunkGen(filename, 100):
		try:
			jids.append(cloud.call(ProcessSeq, (block, elm_dict), _label=filename))
		except cloud.cloud.CloudException:
			try:
				print 'tooooo big'
				jids.append(cloud.call(ProcessSeq, (block[0::2], elm_dict), _label=filename))
				jids.append(cloud.call(ProcessSeq, (block[1::2], elm_dict), _label=filename))
			except cloud.cloud.CloudException:
				print 'really tooo big'
				jids.append(cloud.call(ProcessSeq, (block[0::3], elm_dict), _label=filename))
				jids.append(cloud.call(ProcessSeq, (block[1::3], elm_dict), _label=filename))
				jids.append(cloud.call(ProcessSeq, (block[2::3], elm_dict), _label=filename))
				
	#print str(jids)
	print 'waiting!'
	for i, res in enumerate(cloud.iresult(jids)):
		if i % 4 == 0: print 'processing result %d' % i
		for key, item in res.iteritems():
			elm, spec = key
			count_dict[key] += item
			elm_count[elm] += item
			
	cloud.delete(jids)
			
	outdict = {}
	for key, count in count_dict.iteritems():
		elm, spec = key
		outdict[key] = (count, float(count) / float(elm_count[elm]))
		
	return outdict
Esempio n. 4
0
import cloud
import time

def square(x):
        time.sleep(x)
        return x*x

jids = cloud.map(square, range(4))
# delete information about these jobs from server
cloud.delete(jids)
Esempio n. 5
0
def DictFromGen(GEN, elmfile, label = None, chunk_size = 10, stop_count = None):		
	"""Creates a dictionary of ELM frequencies from a generator"""
	
	
	
	def ChunkGen(gen, per_block, stop_count):
		block = take(per_block, gen)
		c = per_block
		while block:
			logging.debug('yeilding block %d, len: %d' % (c, len(block)))
			yield block
			block = take(per_block, gen)
			c += per_block
			if stop_count != None and stop_count < c: break
		
	def ProcessSeq(seq_chunk, d):
		count_dict = defaultdict(int)
		elm_dict = {}
		for key, val in d.items():
			elm_dict[key] = re.compile(val)
		for i, seq in enumerate(seq_chunk):		
			#print i
			for elm, reg in elm_dict.items():
				m = reg.search(seq)
				while m:
					count_dict[(elm, m.group())] += 1
					m = reg.search(seq, m.start()+1)
		return count_dict

	elm_dict = ReadELMs_nocompile(elmfile)
	jids = []
	
	for chunk in ChunkGen(GEN, chunk_size, stop_count):
		submitted = False
		s = 1
		tids = []
		while not submitted:
			c = 1
			while c<=s:
				try:
					#try to submit the current slice of the block
					logging.debug('Submitting a chunk to the cloud')
					id = cloud.call(ProcessSeq, chunk[c-1::s], elm_dict, _label = label)
					tids.append(id)
					c+=1
					submitted = True
				except cloud.cloud.CloudException:
					#if there is an exception because there is too much info
					#then kill, delete the cloud.call and then increase the
					#slicing
					submitted = False
					cloud.kill(tids)
					cloud.delete(tids)
					logging.warning('Chunk was too big at slice: %d' %c)
					s += 1
					break
		jids += tids
	
	count_dict = defaultdict(int)
	elm_count = defaultdict(int)
	logging.warning('Waiting for the cloud')
	for i, res in enumerate(cloud.iresult(jids)):
		logging.debug('Processing result: %s' % i)
		for key, item in res.iteritems():
			elm, spec = key
			count_dict[key] += item
			elm_count[elm] += item
		
	logging.info('Deleting jobs')
	cloud.delete(jids)

	logging.info('Creating output dictionary')
	outdict = {}
	for key, count in count_dict.iteritems():
            elm, spec = key
            outdict[key] = (count, float(count) / float(elm_count[elm]))

	return outdict