Example #1
0
    def run(self, callable, data):
        # Clear Queue
        self.clear_tasks()
        time.sleep(1)

        # Create all distributed tasks in the queue
        print("Creating tasks")
        tasks = [callable.delay(datum) for datum in data]
        t = tqdm(total=len(tasks), unit="task")
        results = ResultSet(tasks, app=self.app)

        start_time = time.time()

        # Wait for all distributed tasks to finish
        last_completed = 0
        while True:
            if time.time() - start_time > 3600: # Will happen every hour
                start_time = time.time()
                self.spawn_workers() # Restart all slaves

            try:
                if results.ready():
                    break
                completed = results.completed_count()
                t.update(completed - last_completed)
                last_completed = completed
            except Exception as e:
                time.sleep(10)
                pass

            time.sleep(1)

        t.update(results.completed_count() - last_completed)

        return self
Example #2
0
File: views.py Project: velsa/mdbox
def get_result(request):
	ret = { 'status': 'error', 'result': '', 'messages': [ '', ],  }
	if request.method == 'POST' and request.user:
		try:
			user = MDBUser.objects.get(username=request.user.username)
		except Exception, e:
			ret['messages'][0] = "<strong>FATAL</strong>(get_result.user): %s" % e
		else:
			# Note: this is NOT status of tasks, 'success' here means that
			# get_result() request was processed correctly
			ret['status'] = 'success'
			async_res = AsyncResult(request.POST['task_id'])
			if async_res.ready():
				# Get all subtasks spawned by parent
				subtasks = None #ust_get_ids(user)
				# Create list of AsyncResults from list of task_ids
				async_results = []
				for task_id in subtasks:
					async_results.append(AsyncResult(task_id))
				# And also ResultSet for convenience
				async_res_set = ResultSet(async_results)
				ret['messages'][0] = 'parent task %s: %d of %d subtasks completed' %\
									 (request.POST['task_id'][:8],
									  async_res_set.completed_count(),
									  async_res_set.total,
									 )
				# All tasks completed ?
				if async_res_set.ready():
					# All tasks done, forget about those task ids
					#ust_clear_ids(user)
					# Any of them failed ?
					if async_res_set.failed():
						ret['result'] = 'FAILURE'
						for async_res in async_results:
							if async_res.state == 'FAILURE':
								ret['messages'].append("<strong>ERROR</strong>(get_result.FAILURE): '%s':'%s'" %\
													   (async_res.task_id[:8], async_res.result, ))
					else:
						ret['result'] = 'SUCCESS'
				else:
					ret['result'] = 'PENDING'
			else:
				ret['result'] = 'PENDING'
				ret['messages'][0] = 'parent task %s: PENDING' % \
					(request.POST['task_id'], )
Example #3
0
    #sponsored = train.loc[train['file'] == openfile]
    #if not sponsored.empty:
    #result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))
    #testing = sample.loc[sample['file'] == openfile]
    #if not testing.empty:
    #result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))

    bar.numerator = k
    print("Sending out processes ", bar, end='\r')
    sys.stdout.flush()

bar = ProgressBar(len(train) + len(test_files), max_width=40)
while not result.ready():
    time.sleep(5)
    bar.numerator = result.completed_count()
    print("Waiting for return results ", bar, end='\r')
    sys.stdout.flush()

results = result.join()  #wait for jobs to finish

df_full = pd.DataFrame(list(results))

print('--- Training random forest')
clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0)
train_data = df_full[df_full.sponsored.notnull()].fillna(0)
test = df_full[df_full.sponsored.isnull()
               & df_full.file.isin(test_files)].fillna(0)
clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored)

print('--- Create predictions and submission')
	#sponsored = train.loc[train['file'] == openfile]
	#if not sponsored.empty:
		#result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))
	#testing = sample.loc[sample['file'] == openfile]
	#if not testing.empty:
		#result.add(processFile.delay(openfile, data, int(sponsored['sponsored'])))


	bar.numerator = k
	print("Sending out processes ", bar, end='\r')
	sys.stdout.flush()

bar = ProgressBar(len(train)+len(test_files), max_width=40)
while not result.ready():
	time.sleep(5)
	bar.numerator = result.completed_count()
	print("Waiting for return results ", bar, end='\r')
	sys.stdout.flush()

results = result.join() #wait for jobs to finish

df_full = pd.DataFrame(list(results))

print('--- Training random forest')
clf = RandomForestClassifier(n_estimators=150, n_jobs=-1, random_state=0)
train_data = df_full[df_full.sponsored.notnull()].fillna(0)
test = df_full[df_full.sponsored.isnull() & df_full.file.isin(test_files)].fillna(0)
clf.fit(train_data.drop(['file', 'sponsored'], 1), train_data.sponsored)

print('--- Create predictions and submission')
submission = test[['file']].reset_index(drop=True)